diff options
Diffstat (limited to 'contrib')
518 files changed, 20295 insertions, 7661 deletions
diff --git a/contrib/llvm/include/llvm/ADT/IntEqClasses.h b/contrib/llvm/include/llvm/ADT/IntEqClasses.h index 8e75c48..0baee2f 100644 --- a/contrib/llvm/include/llvm/ADT/IntEqClasses.h +++ b/contrib/llvm/include/llvm/ADT/IntEqClasses.h @@ -53,10 +53,10 @@ public: NumClasses = 0; } - /// join - Join the equivalence classes of a and b. After joining classes, - /// findLeader(a) == findLeader(b). - /// This requires an uncompressed map. - void join(unsigned a, unsigned b); + /// Join the equivalence classes of a and b. After joining classes, + /// findLeader(a) == findLeader(b). This requires an uncompressed map. + /// Returns the new leader. + unsigned join(unsigned a, unsigned b); /// findLeader - Compute the leader of a's equivalence class. This is the /// smallest member of the class. diff --git a/contrib/llvm/include/llvm/ADT/PointerEmbeddedInt.h b/contrib/llvm/include/llvm/ADT/PointerEmbeddedInt.h new file mode 100644 index 0000000..8781d18 --- /dev/null +++ b/contrib/llvm/include/llvm/ADT/PointerEmbeddedInt.h @@ -0,0 +1,103 @@ +//===- llvm/ADT/PointerEmbeddedInt.h ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_POINTEREMBEDDEDINT_H +#define LLVM_ADT_POINTEREMBEDDEDINT_H + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/Support/PointerLikeTypeTraits.h" +#include <climits> + +namespace llvm { + +/// Utility to embed an integer into a pointer-like type. This is specifically +/// intended to allow embedding integers where fewer bits are required than +/// exist in a pointer, and the integer can participate in abstractions along +/// side other pointer-like types. For example it can be placed into a \c +/// PointerSumType or \c PointerUnion. +/// +/// Note that much like pointers, an integer value of zero has special utility +/// due to boolean conversions. For example, a non-null value can be tested for +/// in the above abstractions without testing the particular active member. +/// Also, the default constructed value zero initializes the integer. +template <typename IntT, int Bits = sizeof(IntT) * CHAR_BIT> +class PointerEmbeddedInt { + uintptr_t Value; + + static_assert(Bits < sizeof(uintptr_t) * CHAR_BIT, + "Cannot embed more bits than we have in a pointer!"); + + enum : uintptr_t { + // We shift as many zeros into the value as we can while preserving the + // number of bits desired for the integer. + Shift = sizeof(uintptr_t) * CHAR_BIT - Bits, + + // We also want to be able to mask out the preserved bits for asserts. + Mask = static_cast<uintptr_t>(-1) << Bits + }; + + friend class PointerLikeTypeTraits<PointerEmbeddedInt>; + + explicit PointerEmbeddedInt(uintptr_t Value) : Value(Value) {} + +public: + PointerEmbeddedInt() : Value(0) {} + + PointerEmbeddedInt(IntT I) : Value(static_cast<uintptr_t>(I) << Shift) { + assert((I & Mask) == 0 && "Integer has bits outside those preserved!"); + } + + PointerEmbeddedInt &operator=(IntT I) { + assert((I & Mask) == 0 && "Integer has bits outside those preserved!"); + Value = static_cast<uintptr_t>(I) << Shift; + } + + // Note that this imilict conversion additionally allows all of the basic + // comparison operators to work transparently, etc. + operator IntT() const { return static_cast<IntT>(Value >> Shift); } +}; + +// Provide pointer like traits to support use with pointer unions and sum +// types. +template <typename IntT, int Bits> +class PointerLikeTypeTraits<PointerEmbeddedInt<IntT, Bits>> { + typedef PointerEmbeddedInt<IntT, Bits> T; + +public: + static inline void *getAsVoidPointer(const T &P) { + return reinterpret_cast<void *>(P.Value); + } + static inline T getFromVoidPointer(void *P) { + return T(reinterpret_cast<uintptr_t>(P)); + } + static inline T getFromVoidPointer(const void *P) { + return T(reinterpret_cast<uintptr_t>(P)); + } + + enum { NumLowBitsAvailable = T::Shift }; +}; + +// Teach DenseMap how to use PointerEmbeddedInt objects as keys if the Int type +// itself can be a key. +template <typename IntT, int Bits> +struct DenseMapInfo<PointerEmbeddedInt<IntT, Bits>> { + typedef PointerEmbeddedInt<IntT, Bits> T; + + typedef DenseMapInfo<IntT> IntInfo; + + static inline T getEmptyKey() { return IntInfo::getEmptyKey(); } + static inline T getTombstoneKey() { return IntInfo::getTombstoneKey(); } + static unsigned getHashValue(const T &Arg) { + return IntInfo::getHashValue(Arg); + } + static bool isEqual(const T &LHS, const T &RHS) { return LHS == RHS; } +}; +} + +#endif diff --git a/contrib/llvm/include/llvm/ADT/PointerIntPair.h b/contrib/llvm/include/llvm/ADT/PointerIntPair.h index 0058d85..83fbf12 100644 --- a/contrib/llvm/include/llvm/ADT/PointerIntPair.h +++ b/contrib/llvm/include/llvm/ADT/PointerIntPair.h @@ -55,20 +55,25 @@ public: PointerTy getPointer() const { return Info::getPointer(Value); } - IntType getInt() const { return (IntType)Info::getInt(Value); } + IntType getInt() const { + return (IntType)Info::getInt(Value); + } void setPointer(PointerTy PtrVal) { Value = Info::updatePointer(Value, PtrVal); } - void setInt(IntType IntVal) { Value = Info::updateInt(Value, IntVal); } + void setInt(IntType IntVal) { + Value = Info::updateInt(Value, static_cast<intptr_t>(IntVal)); + } void initWithPointer(PointerTy PtrVal) { Value = Info::updatePointer(0, PtrVal); } void setPointerAndInt(PointerTy PtrVal, IntType IntVal) { - Value = Info::updateInt(Info::updatePointer(0, PtrVal), IntVal); + Value = Info::updateInt(Info::updatePointer(0, PtrVal), + static_cast<intptr_t>(IntVal)); } PointerTy const *getAddrOfPointer() const { diff --git a/contrib/llvm/include/llvm/ADT/PointerSumType.h b/contrib/llvm/include/llvm/ADT/PointerSumType.h new file mode 100644 index 0000000..6b8618f --- /dev/null +++ b/contrib/llvm/include/llvm/ADT/PointerSumType.h @@ -0,0 +1,205 @@ +//===- llvm/ADT/PointerSumType.h --------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_POINTERSUMTYPE_H +#define LLVM_ADT_POINTERSUMTYPE_H + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/PointerLikeTypeTraits.h" + +namespace llvm { + +/// A compile time pair of an integer tag and the pointer-like type which it +/// indexes within a sum type. Also allows the user to specify a particular +/// traits class for pointer types with custom behavior such as over-aligned +/// allocation. +template <uintptr_t N, typename PointerArgT, + typename TraitsArgT = PointerLikeTypeTraits<PointerArgT>> +struct PointerSumTypeMember { + enum { Tag = N }; + typedef PointerArgT PointerT; + typedef TraitsArgT TraitsT; +}; + +namespace detail { + +template <typename TagT, typename... MemberTs> +struct PointerSumTypeHelper; + +} + +/// A sum type over pointer-like types. +/// +/// This is a normal tagged union across pointer-like types that uses the low +/// bits of the pointers to store the tag. +/// +/// Each member of the sum type is specified by passing a \c +/// PointerSumTypeMember specialization in the variadic member argument list. +/// This allows the user to control the particular tag value associated with +/// a particular type, use the same type for multiple different tags, and +/// customize the pointer-like traits used for a particular member. Note that +/// these *must* be specializations of \c PointerSumTypeMember, no other type +/// will suffice, even if it provides a compatible interface. +/// +/// This type implements all of the comparison operators and even hash table +/// support by comparing the underlying storage of the pointer values. It +/// doesn't support delegating to particular members for comparisons. +/// +/// It also default constructs to a zero tag with a null pointer, whatever that +/// would be. This means that the zero value for the tag type is significant +/// and may be desireable to set to a state that is particularly desirable to +/// default construct. +/// +/// There is no support for constructing or accessing with a dynamic tag as +/// that would fundamentally violate the type safety provided by the sum type. +template <typename TagT, typename... MemberTs> class PointerSumType { + uintptr_t Value; + + typedef detail::PointerSumTypeHelper<TagT, MemberTs...> HelperT; + +public: + PointerSumType() : Value(0) {} + + /// A typed constructor for a specific tagged member of the sum type. + template <TagT N> + static PointerSumType + create(typename HelperT::template Lookup<N>::PointerT Pointer) { + PointerSumType Result; + void *V = HelperT::template Lookup<N>::TraitsT::getAsVoidPointer(Pointer); + assert((reinterpret_cast<uintptr_t>(V) & HelperT::TagMask) == 0 && + "Pointer is insufficiently aligned to store the discriminant!"); + Result.Value = reinterpret_cast<uintptr_t>(V) | N; + return Result; + } + + TagT getTag() const { return static_cast<TagT>(Value & HelperT::TagMask); } + + template <TagT N> bool is() const { return N == getTag(); } + + template <TagT N> typename HelperT::template Lookup<N>::PointerT get() const { + void *P = is<N>() ? getImpl() : nullptr; + return HelperT::template Lookup<N>::TraitsT::getFromVoidPointer(P); + } + + template <TagT N> + typename HelperT::template Lookup<N>::PointerT cast() const { + assert(is<N>() && "This instance has a different active member."); + return HelperT::template Lookup<N>::TraitsT::getFromVoidPointer(getImpl()); + } + + operator bool() const { return Value & HelperT::PointerMask; } + bool operator==(const PointerSumType &R) const { return Value == R.Value; } + bool operator!=(const PointerSumType &R) const { return Value != R.Value; } + bool operator<(const PointerSumType &R) const { return Value < R.Value; } + bool operator>(const PointerSumType &R) const { return Value > R.Value; } + bool operator<=(const PointerSumType &R) const { return Value <= R.Value; } + bool operator>=(const PointerSumType &R) const { return Value >= R.Value; } + + uintptr_t getOpaqueValue() const { return Value; } + +protected: + void *getImpl() const { + return reinterpret_cast<void *>(Value & HelperT::PointerMask); + } +}; + +namespace detail { + +/// A helper template for implementing \c PointerSumType. It provides fast +/// compile-time lookup of the member from a particular tag value, along with +/// useful constants and compile time checking infrastructure.. +template <typename TagT, typename... MemberTs> +struct PointerSumTypeHelper : MemberTs... { + // First we use a trick to allow quickly looking up information about + // a particular member of the sum type. This works because we arranged to + // have this type derive from all of the member type templates. We can select + // the matching member for a tag using type deduction during overload + // resolution. + template <TagT N, typename PointerT, typename TraitsT> + static PointerSumTypeMember<N, PointerT, TraitsT> + LookupOverload(PointerSumTypeMember<N, PointerT, TraitsT> *); + template <TagT N> static void LookupOverload(...); + template <TagT N> struct Lookup { + // Compute a particular member type by resolving the lookup helper ovorload. + typedef decltype(LookupOverload<N>( + static_cast<PointerSumTypeHelper *>(nullptr))) MemberT; + + /// The Nth member's pointer type. + typedef typename MemberT::PointerT PointerT; + + /// The Nth member's traits type. + typedef typename MemberT::TraitsT TraitsT; + }; + + // Next we need to compute the number of bits available for the discriminant + // by taking the min of the bits available for each member. Much of this + // would be amazingly easier with good constexpr support. + template <uintptr_t V, uintptr_t... Vs> + struct Min : std::integral_constant< + uintptr_t, (V < Min<Vs...>::value ? V : Min<Vs...>::value)> { + }; + template <uintptr_t V> + struct Min<V> : std::integral_constant<uintptr_t, V> {}; + enum { NumTagBits = Min<MemberTs::TraitsT::NumLowBitsAvailable...>::value }; + + // Also compute the smallest discriminant and various masks for convenience. + enum : uint64_t { + MinTag = Min<MemberTs::Tag...>::value, + PointerMask = static_cast<uint64_t>(-1) << NumTagBits, + TagMask = ~PointerMask + }; + + // Finally we need a recursive template to do static checks of each + // member. + template <typename MemberT, typename... InnerMemberTs> + struct Checker : Checker<InnerMemberTs...> { + static_assert(MemberT::Tag < (1 << NumTagBits), + "This discriminant value requires too many bits!"); + }; + template <typename MemberT> struct Checker<MemberT> : std::true_type { + static_assert(MemberT::Tag < (1 << NumTagBits), + "This discriminant value requires too many bits!"); + }; + static_assert(Checker<MemberTs...>::value, + "Each member must pass the checker."); +}; + +} + +// Teach DenseMap how to use PointerSumTypes as keys. +template <typename TagT, typename... MemberTs> +struct DenseMapInfo<PointerSumType<TagT, MemberTs...>> { + typedef PointerSumType<TagT, MemberTs...> SumType; + + typedef detail::PointerSumTypeHelper<TagT, MemberTs...> HelperT; + enum { SomeTag = HelperT::MinTag }; + typedef typename HelperT::template Lookup<HelperT::MinTag>::PointerT + SomePointerT; + typedef DenseMapInfo<SomePointerT> SomePointerInfo; + + static inline SumType getEmptyKey() { + return SumType::create<SomeTag>(SomePointerInfo::getEmptyKey()); + } + static inline SumType getTombstoneKey() { + return SumType::create<SomeTag>( + SomePointerInfo::getTombstoneKey()); + } + static unsigned getHashValue(const SumType &Arg) { + uintptr_t OpaqueValue = Arg.getOpaqueValue(); + return DenseMapInfo<uintptr_t>::getHashValue(OpaqueValue); + } + static bool isEqual(const SumType &LHS, const SumType &RHS) { + return LHS == RHS; + } +}; + +} + +#endif diff --git a/contrib/llvm/include/llvm/ADT/Twine.h b/contrib/llvm/include/llvm/ADT/Twine.h index db0bf4b..81b1a6d 100644 --- a/contrib/llvm/include/llvm/ADT/Twine.h +++ b/contrib/llvm/include/llvm/ADT/Twine.h @@ -101,15 +101,13 @@ namespace llvm { /// A pointer to a SmallString instance. SmallStringKind, - /// A char value reinterpreted as a pointer, to render as a character. + /// A char value, to render as a character. CharKind, - /// An unsigned int value reinterpreted as a pointer, to render as an - /// unsigned decimal integer. + /// An unsigned int value, to render as an unsigned decimal integer. DecUIKind, - /// An int value reinterpreted as a pointer, to render as a signed - /// decimal integer. + /// An int value, to render as a signed decimal integer. DecIKind, /// A pointer to an unsigned long value, to render as an unsigned decimal diff --git a/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h b/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h index ef3d5e8..e02f3ab 100644 --- a/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h +++ b/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h @@ -104,54 +104,10 @@ class LazyCallGraph { public: class Node; class SCC; + class iterator; typedef SmallVector<PointerUnion<Function *, Node *>, 4> NodeVectorT; typedef SmallVectorImpl<PointerUnion<Function *, Node *>> NodeVectorImplT; - /// A lazy iterator used for both the entry nodes and child nodes. - /// - /// When this iterator is dereferenced, if not yet available, a function will - /// be scanned for "calls" or uses of functions and its child information - /// will be constructed. All of these results are accumulated and cached in - /// the graph. - class iterator - : public iterator_adaptor_base<iterator, NodeVectorImplT::iterator, - std::forward_iterator_tag, Node> { - friend class LazyCallGraph; - friend class LazyCallGraph::Node; - - LazyCallGraph *G; - NodeVectorImplT::iterator E; - - // Build the iterator for a specific position in a node list. - iterator(LazyCallGraph &G, NodeVectorImplT::iterator NI, - NodeVectorImplT::iterator E) - : iterator_adaptor_base(NI), G(&G), E(E) { - while (I != E && I->isNull()) - ++I; - } - - public: - iterator() {} - - using iterator_adaptor_base::operator++; - iterator &operator++() { - do { - ++I; - } while (I != E && I->isNull()); - return *this; - } - - reference operator*() const { - if (I->is<Node *>()) - return *I->get<Node *>(); - - Function *F = I->get<Function *>(); - Node &ChildN = G->get(*F); - *I = &ChildN; - return ChildN; - } - }; - /// A node in the call graph. /// /// This represents a single node. It's primary roles are to cache the list of @@ -200,6 +156,51 @@ public: bool operator!=(const Node &N) const { return !operator==(N); } }; + /// A lazy iterator used for both the entry nodes and child nodes. + /// + /// When this iterator is dereferenced, if not yet available, a function will + /// be scanned for "calls" or uses of functions and its child information + /// will be constructed. All of these results are accumulated and cached in + /// the graph. + class iterator + : public iterator_adaptor_base<iterator, NodeVectorImplT::iterator, + std::forward_iterator_tag, Node> { + friend class LazyCallGraph; + friend class LazyCallGraph::Node; + + LazyCallGraph *G; + NodeVectorImplT::iterator E; + + // Build the iterator for a specific position in a node list. + iterator(LazyCallGraph &G, NodeVectorImplT::iterator NI, + NodeVectorImplT::iterator E) + : iterator_adaptor_base(NI), G(&G), E(E) { + while (I != E && I->isNull()) + ++I; + } + + public: + iterator() {} + + using iterator_adaptor_base::operator++; + iterator &operator++() { + do { + ++I; + } while (I != E && I->isNull()); + return *this; + } + + reference operator*() const { + if (I->is<Node *>()) + return *I->get<Node *>(); + + Function *F = I->get<Function *>(); + Node &ChildN = G->get(*F); + *I = &ChildN; + return ChildN; + } + }; + /// An SCC of the call graph. /// /// This represents a Strongly Connected Component of the call graph as diff --git a/contrib/llvm/include/llvm/Analysis/LoopInfo.h b/contrib/llvm/include/llvm/Analysis/LoopInfo.h index c219bd8..70e636c 100644 --- a/contrib/llvm/include/llvm/Analysis/LoopInfo.h +++ b/contrib/llvm/include/llvm/Analysis/LoopInfo.h @@ -59,38 +59,37 @@ template<class N, class M> class LoopInfoBase; template<class N, class M> class LoopBase; //===----------------------------------------------------------------------===// -/// LoopBase class - Instances of this class are used to represent loops that -/// are detected in the flow graph +/// Instances of this class are used to represent loops that are detected in the +/// flow graph. /// template<class BlockT, class LoopT> class LoopBase { LoopT *ParentLoop; - // SubLoops - Loops contained entirely within this one. + // Loops contained entirely within this one. std::vector<LoopT *> SubLoops; - // Blocks - The list of blocks in this loop. First entry is the header node. + // The list of blocks in this loop. First entry is the header node. std::vector<BlockT*> Blocks; SmallPtrSet<const BlockT*, 8> DenseBlockSet; - /// Indicator that this loops has been "unlooped", so there's no loop here - /// anymore. - bool IsUnloop = false; + /// Indicator that this loop is no longer a valid loop. + bool IsInvalid = false; LoopBase(const LoopBase<BlockT, LoopT> &) = delete; const LoopBase<BlockT, LoopT>& operator=(const LoopBase<BlockT, LoopT> &) = delete; public: - /// Loop ctor - This creates an empty loop. + /// This creates an empty loop. LoopBase() : ParentLoop(nullptr) {} ~LoopBase() { for (size_t i = 0, e = SubLoops.size(); i != e; ++i) delete SubLoops[i]; } - /// getLoopDepth - Return the nesting level of this loop. An outer-most - /// loop has depth 1, for consistency with loop depth values used for basic - /// blocks, where depth 0 is used for blocks not inside any loops. + /// Return the nesting level of this loop. An outer-most loop has depth 1, + /// for consistency with loop depth values used for basic blocks, where depth + /// 0 is used for blocks not inside any loops. unsigned getLoopDepth() const { unsigned D = 1; for (const LoopT *CurLoop = ParentLoop; CurLoop; @@ -101,33 +100,28 @@ public: BlockT *getHeader() const { return Blocks.front(); } LoopT *getParentLoop() const { return ParentLoop; } - /// setParentLoop is a raw interface for bypassing addChildLoop. + /// This is a raw interface for bypassing addChildLoop. void setParentLoop(LoopT *L) { ParentLoop = L; } - /// contains - Return true if the specified loop is contained within in - /// this loop. - /// + /// Return true if the specified loop is contained within in this loop. bool contains(const LoopT *L) const { if (L == this) return true; if (!L) return false; return contains(L->getParentLoop()); } - /// contains - Return true if the specified basic block is in this loop. - /// + /// Return true if the specified basic block is in this loop. bool contains(const BlockT *BB) const { return DenseBlockSet.count(BB); } - /// contains - Return true if the specified instruction is in this loop. - /// + /// Return true if the specified instruction is in this loop. template<class InstT> bool contains(const InstT *Inst) const { return contains(Inst->getParent()); } - /// iterator/begin/end - Return the loops contained entirely within this loop. - /// + /// Return the loops contained entirely within this loop. const std::vector<LoopT *> &getSubLoops() const { return SubLoops; } std::vector<LoopT *> &getSubLoopsVector() { return SubLoops; } typedef typename std::vector<LoopT *>::const_iterator iterator; @@ -139,8 +133,7 @@ public: reverse_iterator rend() const { return SubLoops.rend(); } bool empty() const { return SubLoops.empty(); } - /// getBlocks - Get a list of the basic blocks which make up this loop. - /// + /// Get a list of the basic blocks which make up this loop. const std::vector<BlockT*> &getBlocks() const { return Blocks; } typedef typename std::vector<BlockT*>::const_iterator block_iterator; block_iterator block_begin() const { return Blocks.begin(); } @@ -149,21 +142,19 @@ public: return make_range(block_begin(), block_end()); } - /// getNumBlocks - Get the number of blocks in this loop in constant time. + /// Get the number of blocks in this loop in constant time. unsigned getNumBlocks() const { return Blocks.size(); } - /// Mark this loop as having been unlooped - the last backedge was removed and - /// we no longer have a loop. - void markUnlooped() { IsUnloop = true; } + /// Invalidate the loop, indicating that it is no longer a loop. + void invalidate() { IsInvalid = true; } - /// Return true if this no longer represents a loop. - bool isUnloop() const { return IsUnloop; } + /// Return true if this loop is no longer valid. + bool isInvalid() { return IsInvalid; } - /// isLoopExiting - True if terminator in the block can branch to another - /// block that is outside of the current loop. - /// + /// True if terminator in the block can branch to another block that is + /// outside of the current loop. bool isLoopExiting(const BlockT *BB) const { typedef GraphTraits<const BlockT*> BlockTraits; for (typename BlockTraits::ChildIteratorType SI = @@ -175,8 +166,7 @@ public: return false; } - /// getNumBackEdges - Calculate the number of back edges to the loop header - /// + /// Calculate the number of back edges to the loop header. unsigned getNumBackEdges() const { unsigned NumBackEdges = 0; BlockT *H = getHeader(); @@ -199,53 +189,49 @@ public: // induction variable canonicalization pass should be used to normalize loops // for easy analysis. These methods assume canonical loops. - /// getExitingBlocks - Return all blocks inside the loop that have successors - /// outside of the loop. These are the blocks _inside of the current loop_ - /// which branch out. The returned list is always unique. - /// + /// Return all blocks inside the loop that have successors outside of the + /// loop. These are the blocks _inside of the current loop_ which branch out. + /// The returned list is always unique. void getExitingBlocks(SmallVectorImpl<BlockT *> &ExitingBlocks) const; - /// getExitingBlock - If getExitingBlocks would return exactly one block, - /// return that block. Otherwise return null. + /// If getExitingBlocks would return exactly one block, return that block. + /// Otherwise return null. BlockT *getExitingBlock() const; - /// getExitBlocks - Return all of the successor blocks of this loop. These - /// are the blocks _outside of the current loop_ which are branched to. - /// + /// Return all of the successor blocks of this loop. These are the blocks + /// _outside of the current loop_ which are branched to. void getExitBlocks(SmallVectorImpl<BlockT*> &ExitBlocks) const; - /// getExitBlock - If getExitBlocks would return exactly one block, - /// return that block. Otherwise return null. + /// If getExitBlocks would return exactly one block, return that block. + /// Otherwise return null. BlockT *getExitBlock() const; /// Edge type. typedef std::pair<const BlockT*, const BlockT*> Edge; - /// getExitEdges - Return all pairs of (_inside_block_,_outside_block_). + /// Return all pairs of (_inside_block_,_outside_block_). void getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const; - /// getLoopPreheader - If there is a preheader for this loop, return it. A - /// loop has a preheader if there is only one edge to the header of the loop - /// from outside of the loop. If this is the case, the block branching to the - /// header of the loop is the preheader node. + /// If there is a preheader for this loop, return it. A loop has a preheader + /// if there is only one edge to the header of the loop from outside of the + /// loop. If this is the case, the block branching to the header of the loop + /// is the preheader node. /// /// This method returns null if there is no preheader for the loop. - /// BlockT *getLoopPreheader() const; - /// getLoopPredecessor - If the given loop's header has exactly one unique - /// predecessor outside the loop, return it. Otherwise return null. - /// This is less strict that the loop "preheader" concept, which requires + /// If the given loop's header has exactly one unique predecessor outside the + /// loop, return it. Otherwise return null. + /// This is less strict that the loop "preheader" concept, which requires /// the predecessor to have exactly one successor. - /// BlockT *getLoopPredecessor() const; - /// getLoopLatch - If there is a single latch block for this loop, return it. + /// If there is a single latch block for this loop, return it. /// A latch block is a block that contains a branch back to the header. BlockT *getLoopLatch() const; - /// getLoopLatches - Return all loop latch blocks of this loop. A latch block - /// is a block that contains a branch back to the header. + /// Return all loop latch blocks of this loop. A latch block is a block that + /// contains a branch back to the header. void getLoopLatches(SmallVectorImpl<BlockT *> &LoopLatches) const { BlockT *H = getHeader(); typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits; @@ -260,32 +246,29 @@ public: // APIs for updating loop information after changing the CFG // - /// addBasicBlockToLoop - This method is used by other analyses to update loop - /// information. NewBB is set to be a new member of the current loop. + /// This method is used by other analyses to update loop information. + /// NewBB is set to be a new member of the current loop. /// Because of this, it is added as a member of all parent loops, and is added /// to the specified LoopInfo object as being in the current basic block. It /// is not valid to replace the loop header with this method. - /// void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LI); - /// replaceChildLoopWith - This is used when splitting loops up. It replaces - /// the OldChild entry in our children list with NewChild, and updates the - /// parent pointer of OldChild to be null and the NewChild to be this loop. + /// This is used when splitting loops up. It replaces the OldChild entry in + /// our children list with NewChild, and updates the parent pointer of + /// OldChild to be null and the NewChild to be this loop. /// This updates the loop depth of the new child. void replaceChildLoopWith(LoopT *OldChild, LoopT *NewChild); - /// addChildLoop - Add the specified loop to be a child of this loop. This - /// updates the loop depth of the new child. - /// + /// Add the specified loop to be a child of this loop. + /// This updates the loop depth of the new child. void addChildLoop(LoopT *NewChild) { assert(!NewChild->ParentLoop && "NewChild already has a parent!"); NewChild->ParentLoop = static_cast<LoopT *>(this); SubLoops.push_back(NewChild); } - /// removeChildLoop - This removes the specified child from being a subloop of - /// this loop. The loop is not deleted, as it will presumably be inserted - /// into another loop. + /// This removes the specified child from being a subloop of this loop. The + /// loop is not deleted, as it will presumably be inserted into another loop. LoopT *removeChildLoop(iterator I) { assert(I != SubLoops.end() && "Cannot remove end iterator!"); LoopT *Child = *I; @@ -295,7 +278,7 @@ public: return Child; } - /// addBlockEntry - This adds a basic block directly to the basic block list. + /// This adds a basic block directly to the basic block list. /// This should only be used by transformations that create new loops. Other /// transformations should use addBasicBlockToLoop. void addBlockEntry(BlockT *BB) { @@ -303,19 +286,18 @@ public: DenseBlockSet.insert(BB); } - /// reverseBlocks - interface to reverse Blocks[from, end of loop] in this loop + /// interface to reverse Blocks[from, end of loop] in this loop void reverseBlock(unsigned from) { std::reverse(Blocks.begin() + from, Blocks.end()); } - /// reserveBlocks- interface to do reserve() for Blocks + /// interface to do reserve() for Blocks void reserveBlocks(unsigned size) { Blocks.reserve(size); } - /// moveToHeader - This method is used to move BB (which must be part of this - /// loop) to be the loop header of the loop (the block that dominates all - /// others). + /// This method is used to move BB (which must be part of this loop) to be the + /// loop header of the loop (the block that dominates all others). void moveToHeader(BlockT *BB) { if (Blocks[0] == BB) return; for (unsigned i = 0; ; ++i) { @@ -328,9 +310,9 @@ public: } } - /// removeBlockFromLoop - This removes the specified basic block from the - /// current loop, updating the Blocks as appropriate. This does not update - /// the mapping in the LoopInfo class. + /// This removes the specified basic block from the current loop, updating the + /// Blocks as appropriate. This does not update the mapping in the LoopInfo + /// class. void removeBlockFromLoop(BlockT *BB) { auto I = std::find(Blocks.begin(), Blocks.end(), BB); assert(I != Blocks.end() && "N is not in this list!"); @@ -339,10 +321,10 @@ public: DenseBlockSet.erase(BB); } - /// verifyLoop - Verify loop structure + /// Verify loop structure void verifyLoop() const; - /// verifyLoop - Verify loop structure of this loop and all nested loops. + /// Verify loop structure of this loop and all nested loops. void verifyLoopNest(DenseSet<const LoopT*> *Loops) const; void print(raw_ostream &OS, unsigned Depth = 0) const; @@ -368,28 +350,26 @@ class Loop : public LoopBase<BasicBlock, Loop> { public: Loop() {} - /// isLoopInvariant - Return true if the specified value is loop invariant - /// + /// Return true if the specified value is loop invariant. bool isLoopInvariant(const Value *V) const; - /// hasLoopInvariantOperands - Return true if all the operands of the - /// specified instruction are loop invariant. + /// Return true if all the operands of the specified instruction are loop + /// invariant. bool hasLoopInvariantOperands(const Instruction *I) const; - /// makeLoopInvariant - If the given value is an instruction inside of the - /// loop and it can be hoisted, do so to make it trivially loop-invariant. + /// If the given value is an instruction inside of the loop and it can be + /// hoisted, do so to make it trivially loop-invariant. /// Return true if the value after any hoisting is loop invariant. This /// function can be used as a slightly more aggressive replacement for /// isLoopInvariant. /// /// If InsertPt is specified, it is the point to hoist instructions to. /// If null, the terminator of the loop preheader is used. - /// bool makeLoopInvariant(Value *V, bool &Changed, Instruction *InsertPt = nullptr) const; - /// makeLoopInvariant - If the given instruction is inside of the - /// loop and it can be hoisted, do so to make it trivially loop-invariant. + /// If the given instruction is inside of the loop and it can be hoisted, do + /// so to make it trivially loop-invariant. /// Return true if the instruction after any hoisting is loop invariant. This /// function can be used as a slightly more aggressive replacement for /// isLoopInvariant. @@ -400,28 +380,26 @@ public: bool makeLoopInvariant(Instruction *I, bool &Changed, Instruction *InsertPt = nullptr) const; - /// getCanonicalInductionVariable - Check to see if the loop has a canonical - /// induction variable: an integer recurrence that starts at 0 and increments - /// by one each time through the loop. If so, return the phi node that - /// corresponds to it. + /// Check to see if the loop has a canonical induction variable: an integer + /// recurrence that starts at 0 and increments by one each time through the + /// loop. If so, return the phi node that corresponds to it. /// /// The IndVarSimplify pass transforms loops to have a canonical induction /// variable. /// PHINode *getCanonicalInductionVariable() const; - /// isLCSSAForm - Return true if the Loop is in LCSSA form + /// Return true if the Loop is in LCSSA form. bool isLCSSAForm(DominatorTree &DT) const; - /// \brief Return true if this Loop and all inner subloops are in LCSSA form. + /// Return true if this Loop and all inner subloops are in LCSSA form. bool isRecursivelyLCSSAForm(DominatorTree &DT) const; - /// isLoopSimplifyForm - Return true if the Loop is in the form that - /// the LoopSimplify form transforms loops to, which is sometimes called - /// normal form. + /// Return true if the Loop is in the form that the LoopSimplify form + /// transforms loops to, which is sometimes called normal form. bool isLoopSimplifyForm() const; - /// isSafeToClone - Return true if the loop body is safe to clone in practice. + /// Return true if the loop body is safe to clone in practice. bool isSafeToClone() const; /// Returns true if the loop is annotated parallel. @@ -454,23 +432,22 @@ public: /// operand should should be the node itself. void setLoopID(MDNode *LoopID) const; - /// hasDedicatedExits - Return true if no exit block for the loop - /// has a predecessor that is outside the loop. + /// Return true if no exit block for the loop has a predecessor that is + /// outside the loop. bool hasDedicatedExits() const; - /// getUniqueExitBlocks - Return all unique successor blocks of this loop. + /// Return all unique successor blocks of this loop. /// These are the blocks _outside of the current loop_ which are branched to. /// This assumes that loop exits are in canonical form. - /// void getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const; - /// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one - /// block, return that block. Otherwise return null. + /// If getUniqueExitBlocks would return exactly one block, return that block. + /// Otherwise return null. BasicBlock *getUniqueExitBlock() const; void dump() const; - /// \brief Return the debug location of the start of this loop. + /// Return the debug location of the start of this loop. /// This looks for a BB terminating instruction with a known debug /// location by looking at the preheader and header blocks. If it /// cannot find a terminating instruction with location information, @@ -498,7 +475,7 @@ private: }; //===----------------------------------------------------------------------===// -/// LoopInfo - This class builds and contains all of the top level loop +/// This class builds and contains all of the top-level loop /// structures in the specified function. /// @@ -507,6 +484,8 @@ class LoopInfoBase { // BBMap - Mapping of basic blocks to the inner most loop they occur in DenseMap<const BlockT *, LoopT *> BBMap; std::vector<LoopT *> TopLevelLoops; + std::vector<LoopT *> RemovedLoops; + friend class LoopBase<BlockT, LoopT>; friend class LoopInfo; @@ -538,6 +517,9 @@ public: for (auto *L : TopLevelLoops) delete L; TopLevelLoops.clear(); + for (auto *L : RemovedLoops) + delete L; + RemovedLoops.clear(); } /// iterator/begin/end - The interface to the top-level loops in the current @@ -552,33 +534,30 @@ public: reverse_iterator rend() const { return TopLevelLoops.rend(); } bool empty() const { return TopLevelLoops.empty(); } - /// getLoopFor - Return the inner most loop that BB lives in. If a basic - /// block is in no loop (for example the entry node), null is returned. - /// + /// Return the inner most loop that BB lives in. If a basic block is in no + /// loop (for example the entry node), null is returned. LoopT *getLoopFor(const BlockT *BB) const { return BBMap.lookup(BB); } - /// operator[] - same as getLoopFor... - /// + /// Same as getLoopFor. const LoopT *operator[](const BlockT *BB) const { return getLoopFor(BB); } - /// getLoopDepth - Return the loop nesting level of the specified block. A - /// depth of 0 means the block is not inside any loop. - /// + /// Return the loop nesting level of the specified block. A depth of 0 means + /// the block is not inside any loop. unsigned getLoopDepth(const BlockT *BB) const { const LoopT *L = getLoopFor(BB); return L ? L->getLoopDepth() : 0; } - // isLoopHeader - True if the block is a loop header node + // True if the block is a loop header node bool isLoopHeader(const BlockT *BB) const { const LoopT *L = getLoopFor(BB); return L && L->getHeader() == BB; } - /// removeLoop - This removes the specified top-level loop from this loop info - /// object. The loop is not deleted, as it will presumably be inserted into + /// This removes the specified top-level loop from this loop info object. + /// The loop is not deleted, as it will presumably be inserted into /// another loop. LoopT *removeLoop(iterator I) { assert(I != end() && "Cannot remove end iterator!"); @@ -588,9 +567,9 @@ public: return L; } - /// changeLoopFor - Change the top-level loop that contains BB to the - /// specified loop. This should be used by transformations that restructure - /// the loop hierarchy tree. + /// Change the top-level loop that contains BB to the specified loop. + /// This should be used by transformations that restructure the loop hierarchy + /// tree. void changeLoopFor(BlockT *BB, LoopT *L) { if (!L) { BBMap.erase(BB); @@ -599,8 +578,8 @@ public: BBMap[BB] = L; } - /// changeTopLevelLoop - Replace the specified loop in the top-level loops - /// list with the indicated loop. + /// Replace the specified loop in the top-level loops list with the indicated + /// loop. void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop) { auto I = std::find(TopLevelLoops.begin(), TopLevelLoops.end(), OldLoop); @@ -610,14 +589,13 @@ public: "Loops already embedded into a subloop!"); } - /// addTopLevelLoop - This adds the specified loop to the collection of - /// top-level loops. + /// This adds the specified loop to the collection of top-level loops. void addTopLevelLoop(LoopT *New) { assert(!New->getParentLoop() && "Loop already in subloop!"); TopLevelLoops.push_back(New); } - /// removeBlock - This method completely removes BB from all data structures, + /// This method completely removes BB from all data structures, /// including all of the Loop objects it is nested in and our mapping from /// BasicBlocks to loops. void removeBlock(BlockT *BB) { @@ -670,15 +648,14 @@ public: // Most of the public interface is provided via LoopInfoBase. - /// updateUnloop - Update LoopInfo after removing the last backedge from a - /// loop--now the "unloop". This updates the loop forest and parent loops for - /// each block so that Unloop is no longer referenced, but does not actually - /// delete the Unloop object. Generally, the loop pass manager should manage - /// deleting the Unloop. - void updateUnloop(Loop *Unloop); + /// Update LoopInfo after removing the last backedge from a loop. This updates + /// the loop forest and parent loops for each block so that \c L is no longer + /// referenced, but does not actually delete \c L immediately. The pointer + /// will remain valid until this LoopInfo's memory is released. + void markAsRemoved(Loop *L); - /// replacementPreservesLCSSAForm - Returns true if replacing From with To - /// everywhere is guaranteed to preserve LCSSA form. + /// Returns true if replacing From with To everywhere is guaranteed to + /// preserve LCSSA form. bool replacementPreservesLCSSAForm(Instruction *From, Value *To) { // Preserving LCSSA form is only problematic if the replacing value is an // instruction. @@ -698,8 +675,7 @@ public: return ToLoop->contains(getLoopFor(From->getParent())); } - /// \brief Checks if moving a specific instruction can break LCSSA in any - /// loop. + /// Checks if moving a specific instruction can break LCSSA in any loop. /// /// Return true if moving \p Inst to before \p NewLoc will break LCSSA, /// assuming that the function containing \p Inst and \p NewLoc is currently diff --git a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h index f5e778b..cf29fc9 100644 --- a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -259,7 +259,7 @@ public: void EmitAlignment(unsigned NumBits, const GlobalObject *GO = nullptr) const; /// Lower the specified LLVM Constant to an MCExpr. - const MCExpr *lowerConstant(const Constant *CV); + virtual const MCExpr *lowerConstant(const Constant *CV); /// \brief Print a general LLVM constant to the .s file. void EmitGlobalConstant(const DataLayout &DL, const Constant *CV); diff --git a/contrib/llvm/include/llvm/CodeGen/DIE.h b/contrib/llvm/include/llvm/CodeGen/DIE.h index fa612d9..72b3adc 100644 --- a/contrib/llvm/include/llvm/CodeGen/DIE.h +++ b/contrib/llvm/include/llvm/CodeGen/DIE.h @@ -29,6 +29,48 @@ class MCSymbol; class raw_ostream; class DwarfTypeUnit; +// AsmStreamerBase - A base abstract interface class defines methods that +// can be implemented to stream objects or can be implemented to +// calculate the size of the streamed objects. +// The derived classes will use an AsmPrinter to implement the methods. +// +// TODO: complete this interface and use it to merge EmitValue and SizeOf +// methods in the DIE classes below. +class AsmStreamerBase { +protected: + const AsmPrinter *AP; + AsmStreamerBase(const AsmPrinter *AP) : AP(AP) {} + +public: + virtual ~AsmStreamerBase() {} + virtual unsigned emitULEB128(uint64_t Value, const char *Desc = nullptr, + unsigned PadTo = 0) = 0; + virtual unsigned emitInt8(unsigned char Value) = 0; + virtual unsigned emitBytes(StringRef Data) = 0; +}; + +/// EmittingAsmStreamer - Implements AbstractAsmStreamer to stream objects. +/// Notice that the return value is not the actual size of the streamed object. +/// For size calculation use SizeReporterAsmStreamer. +class EmittingAsmStreamer : public AsmStreamerBase { +public: + EmittingAsmStreamer(const AsmPrinter *AP) : AsmStreamerBase(AP) {} + unsigned emitULEB128(uint64_t Value, const char *Desc = nullptr, + unsigned PadTo = 0) override; + unsigned emitInt8(unsigned char Value) override; + unsigned emitBytes(StringRef Data) override; +}; + +/// SizeReporterAsmStreamer - Only reports the size of the streamed objects. +class SizeReporterAsmStreamer : public AsmStreamerBase { +public: + SizeReporterAsmStreamer(const AsmPrinter *AP) : AsmStreamerBase(AP) {} + unsigned emitULEB128(uint64_t Value, const char *Desc = nullptr, + unsigned PadTo = 0) override; + unsigned emitInt8(unsigned char Value) override; + unsigned emitBytes(StringRef Data) override; +}; + //===--------------------------------------------------------------------===// /// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a /// Dwarf abbreviation. diff --git a/contrib/llvm/include/llvm/CodeGen/LiveInterval.h b/contrib/llvm/include/llvm/CodeGen/LiveInterval.h index 0157bf9..edade31 100644 --- a/contrib/llvm/include/llvm/CodeGen/LiveInterval.h +++ b/contrib/llvm/include/llvm/CodeGen/LiveInterval.h @@ -848,9 +848,9 @@ namespace llvm { public: explicit ConnectedVNInfoEqClasses(LiveIntervals &lis) : LIS(lis) {} - /// Classify - Classify the values in LI into connected components. - /// Return the number of connected components. - unsigned Classify(const LiveInterval *LI); + /// Classify the values in \p LR into connected components. + /// Returns the number of connected components. + unsigned Classify(const LiveRange &LR); /// getEqClass - Classify creates equivalence classes numbered 0..N. Return /// the equivalence class assigned the VNI. diff --git a/contrib/llvm/include/llvm/CodeGen/RegisterPressure.h b/contrib/llvm/include/llvm/CodeGen/RegisterPressure.h index 987634f..9bbdf3e 100644 --- a/contrib/llvm/include/llvm/CodeGen/RegisterPressure.h +++ b/contrib/llvm/include/llvm/CodeGen/RegisterPressure.h @@ -141,6 +141,28 @@ public: LLVM_DUMP_METHOD void dump(const TargetRegisterInfo &TRI) const; }; +/// List of registers defined and used by a machine instruction. +class RegisterOperands { +public: + /// List of virtual regiserts and register units read by the instruction. + SmallVector<unsigned, 8> Uses; + /// \brief List of virtual registers and register units defined by the + /// instruction which are not dead. + SmallVector<unsigned, 8> Defs; + /// \brief List of virtual registers and register units defined by the + /// instruction but dead. + SmallVector<unsigned, 8> DeadDefs; + + /// Analyze the given instruction \p MI and fill in the Uses, Defs and + /// DeadDefs list based on the MachineOperand flags. + void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, + const MachineRegisterInfo &MRI, bool IgnoreDead = false); + + /// Use liveness information to find dead defs not marked with a dead flag + /// and move them to the DeadDefs vector. + void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS); +}; + /// Array of PressureDiffs. class PressureDiffs { PressureDiff *PDiffArray; @@ -161,6 +183,10 @@ public: const PressureDiff &operator[](unsigned Idx) const { return const_cast<PressureDiffs*>(this)->operator[](Idx); } + /// \brief Record pressure difference induced by the given operand list to + /// node with index \p Idx. + void addInstruction(unsigned Idx, const RegisterOperands &RegOpers, + const MachineRegisterInfo &MRI); }; /// Store the effects of a change in pressure on things that MI scheduler cares @@ -329,8 +355,17 @@ public: void setPos(MachineBasicBlock::const_iterator Pos) { CurrPos = Pos; } /// Recede across the previous instruction. - void recede(SmallVectorImpl<unsigned> *LiveUses = nullptr, - PressureDiff *PDiff = nullptr); + void recede(SmallVectorImpl<unsigned> *LiveUses = nullptr); + + /// Recede across the previous instruction. + /// This "low-level" variant assumes that recedeSkipDebugValues() was + /// called previously and takes precomputed RegisterOperands for the + /// instruction. + void recede(const RegisterOperands &RegOpers, + SmallVectorImpl<unsigned> *LiveUses = nullptr); + + /// Recede until we find an instruction which is not a DebugValue. + void recedeSkipDebugValues(); /// Advance across the current instruction. void advance(); diff --git a/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h b/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h index f6ad7a8..46c1029 100644 --- a/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h +++ b/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h @@ -93,8 +93,6 @@ struct WinEHFuncInfo { DenseMap<const Instruction *, int> EHPadStateMap; DenseMap<const FuncletPadInst *, int> FuncletBaseStateMap; DenseMap<const InvokeInst *, int> InvokeStateMap; - DenseMap<const CatchReturnInst *, const BasicBlock *> - CatchRetSuccessorColorMap; DenseMap<MCSymbol *, std::pair<int, MCSymbol *>> LabelToStateMap; SmallVector<CxxUnwindMapEntry, 4> CxxUnwindMap; SmallVector<WinEHTryBlockMapEntry, 4> TryBlockMap; @@ -125,8 +123,5 @@ void calculateSEHStateNumbers(const Function *ParentFn, WinEHFuncInfo &FuncInfo); void calculateClrEHStateNumbers(const Function *Fn, WinEHFuncInfo &FuncInfo); - -void calculateCatchReturnSuccessorColors(const Function *Fn, - WinEHFuncInfo &FuncInfo); } #endif // LLVM_CODEGEN_WINEHFUNCINFO_H diff --git a/contrib/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h b/contrib/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h index 0703fb1..3098199 100644 --- a/contrib/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h +++ b/contrib/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h @@ -28,13 +28,16 @@ class DIPrinter { raw_ostream &OS; bool PrintFunctionNames; bool PrintPretty; - void printName(const DILineInfo &Info, bool Inlined); + int PrintSourceContext; + + void print(const DILineInfo &Info, bool Inlined); + void printContext(std::string FileName, int64_t Line); public: DIPrinter(raw_ostream &OS, bool PrintFunctionNames = true, - bool PrintPretty = false) + bool PrintPretty = false, int PrintSourceContext = 0) : OS(OS), PrintFunctionNames(PrintFunctionNames), - PrintPretty(PrintPretty) {} + PrintPretty(PrintPretty), PrintSourceContext(PrintSourceContext) {} DIPrinter &operator<<(const DILineInfo &Info); DIPrinter &operator<<(const DIInliningInfo &Info); diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index 7dab5d1..84af472 100644 --- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -19,7 +19,6 @@ #include "LambdaResolver.h" #include "LogicalDylib.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/Transforms/Utils/Cloning.h" #include <list> #include <memory> @@ -61,31 +60,36 @@ private: typedef typename BaseLayerT::ModuleSetHandleT BaseLayerModuleSetHandleT; - class ModuleOwner { + // Provide type-erasure for the Modules and MemoryManagers. + template <typename ResourceT> + class ResourceOwner { public: - ModuleOwner() = default; - ModuleOwner(const ModuleOwner&) = delete; - ModuleOwner& operator=(const ModuleOwner&) = delete; - virtual ~ModuleOwner() { } - virtual Module& getModule() const = 0; + ResourceOwner() = default; + ResourceOwner(const ResourceOwner&) = delete; + ResourceOwner& operator=(const ResourceOwner&) = delete; + virtual ~ResourceOwner() { } + virtual ResourceT& getResource() const = 0; }; - template <typename ModulePtrT> - class ModuleOwnerImpl : public ModuleOwner { + template <typename ResourceT, typename ResourcePtrT> + class ResourceOwnerImpl : public ResourceOwner<ResourceT> { public: - ModuleOwnerImpl(ModulePtrT ModulePtr) : ModulePtr(std::move(ModulePtr)) {} - Module& getModule() const override { return *ModulePtr; } + ResourceOwnerImpl(ResourcePtrT ResourcePtr) + : ResourcePtr(std::move(ResourcePtr)) {} + ResourceT& getResource() const override { return *ResourcePtr; } private: - ModulePtrT ModulePtr; + ResourcePtrT ResourcePtr; }; - template <typename ModulePtrT> - std::unique_ptr<ModuleOwner> wrapOwnership(ModulePtrT ModulePtr) { - return llvm::make_unique<ModuleOwnerImpl<ModulePtrT>>(std::move(ModulePtr)); + template <typename ResourceT, typename ResourcePtrT> + std::unique_ptr<ResourceOwner<ResourceT>> + wrapOwnership(ResourcePtrT ResourcePtr) { + typedef ResourceOwnerImpl<ResourceT, ResourcePtrT> RO; + return llvm::make_unique<RO>(std::move(ResourcePtr)); } struct LogicalModuleResources { - std::unique_ptr<ModuleOwner> SourceModuleOwner; + std::unique_ptr<ResourceOwner<Module>> SourceModule; std::set<const Function*> StubsToClone; std::unique_ptr<IndirectStubsMgrT> StubsMgr; @@ -93,15 +97,16 @@ private: // Explicit move constructor to make MSVC happy. LogicalModuleResources(LogicalModuleResources &&Other) - : SourceModuleOwner(std::move(Other.SourceModuleOwner)), + : SourceModule(std::move(Other.SourceModule)), StubsToClone(std::move(Other.StubsToClone)), StubsMgr(std::move(Other.StubsMgr)) {} // Explicit move assignment to make MSVC happy. LogicalModuleResources& operator=(LogicalModuleResources &&Other) { - SourceModuleOwner = std::move(Other.SourceModuleOwner); + SourceModule = std::move(Other.SourceModule); StubsToClone = std::move(Other.StubsToClone); StubsMgr = std::move(Other.StubsMgr); + return *this; } JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) { @@ -114,12 +119,35 @@ private: }; - - struct LogicalDylibResources { typedef std::function<RuntimeDyld::SymbolInfo(const std::string&)> SymbolResolverFtor; + + typedef std::function<typename BaseLayerT::ModuleSetHandleT( + BaseLayerT&, + std::unique_ptr<Module>, + std::unique_ptr<RuntimeDyld::SymbolResolver>)> + ModuleAdderFtor; + + LogicalDylibResources() = default; + + // Explicit move constructor to make MSVC happy. + LogicalDylibResources(LogicalDylibResources &&Other) + : ExternalSymbolResolver(std::move(Other.ExternalSymbolResolver)), + MemMgr(std::move(Other.MemMgr)), + ModuleAdder(std::move(Other.ModuleAdder)) {} + + // Explicit move assignment operator to make MSVC happy. + LogicalDylibResources& operator=(LogicalDylibResources &&Other) { + ExternalSymbolResolver = std::move(Other.ExternalSymbolResolver); + MemMgr = std::move(Other.MemMgr); + ModuleAdder = std::move(Other.ModuleAdder); + return *this; + } + SymbolResolverFtor ExternalSymbolResolver; + std::unique_ptr<ResourceOwner<RuntimeDyld::MemoryManager>> MemMgr; + ModuleAdderFtor ModuleAdder; }; typedef LogicalDylib<BaseLayerT, LogicalModuleResources, @@ -157,9 +185,6 @@ public: MemoryManagerPtrT MemMgr, SymbolResolverPtrT Resolver) { - assert(MemMgr == nullptr && - "User supplied memory managers not supported with COD yet."); - LogicalDylibs.push_back(CODLogicalDylib(BaseLayer)); auto &LDResources = LogicalDylibs.back().getDylibResources(); @@ -168,6 +193,18 @@ public: return Resolver->findSymbol(Name); }; + auto &MemMgrRef = *MemMgr; + LDResources.MemMgr = + wrapOwnership<RuntimeDyld::MemoryManager>(std::move(MemMgr)); + + LDResources.ModuleAdder = + [&MemMgrRef](BaseLayerT &B, std::unique_ptr<Module> M, + std::unique_ptr<RuntimeDyld::SymbolResolver> R) { + std::vector<std::unique_ptr<Module>> Ms; + Ms.push_back(std::move(M)); + return B.addModuleSet(std::move(Ms), &MemMgrRef, std::move(R)); + }; + // Process each of the modules in this module set. for (auto &M : Ms) addLogicalModule(LogicalDylibs.back(), std::move(M)); @@ -215,9 +252,9 @@ private: auto LMH = LD.createLogicalModule(); auto &LMResources = LD.getLogicalModuleResources(LMH); - LMResources.SourceModuleOwner = wrapOwnership(std::move(SrcMPtr)); + LMResources.SourceModule = wrapOwnership<Module>(std::move(SrcMPtr)); - Module &SrcM = LMResources.SourceModuleOwner->getModule(); + Module &SrcM = LMResources.SourceModule->getResource(); // Create the GlobalValues module. const DataLayout &DL = SrcM.getDataLayout(); @@ -326,12 +363,9 @@ private: return RuntimeDyld::SymbolInfo(nullptr); }); - std::vector<std::unique_ptr<Module>> GVsMSet; - GVsMSet.push_back(std::move(GVsM)); auto GVsH = - BaseLayer.addModuleSet(std::move(GVsMSet), - llvm::make_unique<SectionMemoryManager>(), - std::move(GVsResolver)); + LD.getDylibResources().ModuleAdder(BaseLayer, std::move(GVsM), + std::move(GVsResolver)); LD.addToLogicalModule(LMH, GVsH); } @@ -348,7 +382,7 @@ private: LogicalModuleHandle LMH, Function &F) { auto &LMResources = LD.getLogicalModuleResources(LMH); - Module &SrcM = LMResources.SourceModuleOwner->getModule(); + Module &SrcM = LMResources.SourceModule->getResource(); // If F is a declaration we must already have compiled it. if (F.isDeclaration()) @@ -386,7 +420,7 @@ private: LogicalModuleHandle LMH, const PartitionT &Part) { auto &LMResources = LD.getLogicalModuleResources(LMH); - Module &SrcM = LMResources.SourceModuleOwner->getModule(); + Module &SrcM = LMResources.SourceModule->getResource(); // Create the module. std::string NewName = SrcM.getName(); @@ -445,7 +479,6 @@ private: moveFunctionBody(*F, VMap, &Materializer); // Create memory manager and symbol resolver. - auto MemMgr = llvm::make_unique<SectionMemoryManager>(); auto Resolver = createLambdaResolver( [this, &LD, LMH](const std::string &Name) { if (auto Symbol = LD.findSymbolInternally(LMH, Name)) @@ -459,10 +492,9 @@ private: Symbol.getFlags()); return RuntimeDyld::SymbolInfo(nullptr); }); - std::vector<std::unique_ptr<Module>> PartMSet; - PartMSet.push_back(std::move(M)); - return BaseLayer.addModuleSet(std::move(PartMSet), std::move(MemMgr), - std::move(Resolver)); + + return LD.getDylibResources().ModuleAdder(BaseLayer, std::move(M), + std::move(Resolver)); } BaseLayerT &BaseLayer; diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h index d6ee3a8..e17630f 100644 --- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h +++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h @@ -22,6 +22,7 @@ #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Support/Process.h" #include <sstream> namespace llvm { @@ -179,14 +180,15 @@ private: std::error_code EC; auto TrampolineBlock = sys::OwningMemoryBlock( - sys::Memory::allocateMappedMemory(TargetT::PageSize, nullptr, + sys::Memory::allocateMappedMemory(sys::Process::getPageSize(), nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC)); assert(!EC && "Failed to allocate trampoline block"); unsigned NumTrampolines = - (TargetT::PageSize - TargetT::PointerSize) / TargetT::TrampolineSize; + (sys::Process::getPageSize() - TargetT::PointerSize) / + TargetT::TrampolineSize; uint8_t *TrampolineMem = static_cast<uint8_t*>(TrampolineBlock.base()); TargetT::writeTrampolines(TrampolineMem, ResolverBlock.base(), @@ -240,8 +242,8 @@ private: virtual void anchor(); }; -/// @brief IndirectStubsManager implementation for a concrete target, e.g. -/// OrcX86_64. (See OrcTargetSupport.h). +/// @brief IndirectStubsManager implementation for the host architecture, e.g. +/// OrcX86_64. (See OrcArchitectureSupport.h). template <typename TargetT> class LocalIndirectStubsManager : public IndirectStubsManager { public: diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index 2acfecf..4dc48f1 100644 --- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -108,9 +108,7 @@ private: void Finalize() override { State = Finalizing; - RTDyld->resolveRelocations(); - RTDyld->registerEHFrames(); - MemMgr->finalizeMemory(); + RTDyld->finalizeWithMemoryManagerLocking(); State = Finalized; } diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h index 246d3e0..1b0488b 100644 --- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h +++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h @@ -1,4 +1,4 @@ -//===-- OrcTargetSupport.h - Code to support specific targets --*- C++ -*-===// +//===-- OrcArchitectureSupport.h - Architecture support code ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,32 +7,76 @@ // //===----------------------------------------------------------------------===// // -// Target specific code for Orc, e.g. callback assembly. +// Architecture specific code for Orc, e.g. callback assembly. // -// Target classes should be part of the JIT *target* process, not the host +// Architecture classes should be part of the JIT *target* process, not the host // process (except where you're doing hosted JITing and the two are one and the // same). // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTIONENGINE_ORC_ORCTARGETSUPPORT_H -#define LLVM_EXECUTIONENGINE_ORC_ORCTARGETSUPPORT_H +#ifndef LLVM_EXECUTIONENGINE_ORC_ORCARCHITECTURESUPPORT_H +#define LLVM_EXECUTIONENGINE_ORC_ORCARCHITECTURESUPPORT_H #include "IndirectionUtils.h" #include "llvm/Support/Memory.h" +#include "llvm/Support/Process.h" namespace llvm { namespace orc { +/// Generic ORC Architecture support. +/// +/// This class can be substituted as the target architecure support class for +/// ORC templates that require one (e.g. IndirectStubsManagers). It does not +/// support lazy JITing however, and any attempt to use that functionality +/// will result in execution of an llvm_unreachable. +class OrcGenericArchitecture { +public: + static const unsigned PointerSize = sizeof(uintptr_t); + static const unsigned TrampolineSize = 1; + static const unsigned ResolverCodeSize = 1; + + typedef TargetAddress (*JITReentryFn)(void *CallbackMgr, void *TrampolineId); + + static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry, + void *CallbackMgr) { + llvm_unreachable("writeResolverCode is not supported by the generic host " + "support class"); + } + + static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr, + unsigned NumTrampolines) { + llvm_unreachable("writeTrampolines is not supported by the generic host " + "support class"); + } + + class IndirectStubsInfo { + public: + const static unsigned StubSize = 1; + unsigned getNumStubs() const { llvm_unreachable("Not supported"); } + void *getStub(unsigned Idx) const { llvm_unreachable("Not supported"); } + void **getPtr(unsigned Idx) const { llvm_unreachable("Not supported"); } + }; + + static std::error_code emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, + unsigned MinStubs, + void *InitialPtrVal) { + llvm_unreachable("emitIndirectStubsBlock is not supported by the generic " + "host support class"); + } +}; + +/// @brief X86_64 support. +/// +/// X86_64 supports lazy JITing. class OrcX86_64 { public: - static const unsigned PageSize = 4096; static const unsigned PointerSize = 8; static const unsigned TrampolineSize = 8; static const unsigned ResolverCodeSize = 0x78; - typedef TargetAddress (*JITReentryFn)(void *CallbackMgr, - void *TrampolineId); + typedef TargetAddress (*JITReentryFn)(void *CallbackMgr, void *TrampolineId); /// @brief Write the resolver code into the given memory. The user is be /// responsible for allocating the memory and setting permissions. @@ -49,16 +93,16 @@ public: /// makeIndirectStubsBlock function. class IndirectStubsInfo { friend class OrcX86_64; + public: const static unsigned StubSize = 8; - const static unsigned PtrSize = 8; IndirectStubsInfo() : NumStubs(0) {} IndirectStubsInfo(IndirectStubsInfo &&Other) : NumStubs(Other.NumStubs), StubsMem(std::move(Other.StubsMem)) { Other.NumStubs = 0; } - IndirectStubsInfo& operator=(IndirectStubsInfo &&Other) { + IndirectStubsInfo &operator=(IndirectStubsInfo &&Other) { NumStubs = Other.NumStubs; Other.NumStubs = 0; StubsMem = std::move(Other.StubsMem); @@ -70,17 +114,18 @@ public: /// @brief Get a pointer to the stub at the given index, which must be in /// the range 0 .. getNumStubs() - 1. - void* getStub(unsigned Idx) const { - return static_cast<uint64_t*>(StubsMem.base()) + Idx; + void *getStub(unsigned Idx) const { + return static_cast<uint64_t *>(StubsMem.base()) + Idx; } /// @brief Get a pointer to the implementation-pointer at the given index, /// which must be in the range 0 .. getNumStubs() - 1. - void** getPtr(unsigned Idx) const { + void **getPtr(unsigned Idx) const { char *PtrsBase = - static_cast<char*>(StubsMem.base()) + NumStubs * StubSize; - return reinterpret_cast<void**>(PtrsBase) + Idx; + static_cast<char *>(StubsMem.base()) + NumStubs * StubSize; + return reinterpret_cast<void **>(PtrsBase) + Idx; } + private: unsigned NumStubs; sys::OwningMemoryBlock StubsMem; @@ -100,4 +145,4 @@ public: } // End namespace orc. } // End namespace llvm. -#endif // LLVM_EXECUTIONENGINE_ORC_ORCTARGETSUPPORT_H +#endif // LLVM_EXECUTIONENGINE_ORC_ORCARCHITECTURESUPPORT_H diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h new file mode 100644 index 0000000..48f35d6 --- /dev/null +++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h @@ -0,0 +1,37 @@ +//===------ OrcError.h - Reject symbol lookup requests ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Define an error category, error codes, and helper utilities for Orc. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_ORCERROR_H +#define LLVM_EXECUTIONENGINE_ORC_ORCERROR_H + +#include <system_error> + +namespace llvm { +namespace orc { + +enum class OrcErrorCode : int { + // RPC Errors + RemoteAllocatorDoesNotExist = 1, + RemoteAllocatorIdAlreadyInUse, + RemoteMProtectAddrUnrecognized, + RemoteIndirectStubsOwnerDoesNotExist, + RemoteIndirectStubsOwnerIdAlreadyInUse, + UnexpectedRPCCall +}; + +std::error_code orcError(OrcErrorCode ErrCode); + +} // End namespace orc. +} // End namespace llvm. + +#endif // LLVM_EXECUTIONENGINE_ORC_ORCERROR_H diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h new file mode 100644 index 0000000..d7640b8 --- /dev/null +++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h @@ -0,0 +1,784 @@ +//===---- OrcRemoteTargetClient.h - Orc Remote-target Client ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the OrcRemoteTargetClient class and helpers. This class +// can be used to communicate over an RPCChannel with an OrcRemoteTargetServer +// instance to support remote-JITing. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H +#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H + +#include "IndirectionUtils.h" +#include "OrcRemoteTargetRPCAPI.h" +#include <system_error> + +#define DEBUG_TYPE "orc-remote" + +namespace llvm { +namespace orc { +namespace remote { + +/// This class provides utilities (including memory manager, indirect stubs +/// manager, and compile callback manager types) that support remote JITing +/// in ORC. +/// +/// Each of the utility classes talks to a JIT server (an instance of the +/// OrcRemoteTargetServer class) via an RPC system (see RPCUtils.h) to carry out +/// its actions. +template <typename ChannelT> +class OrcRemoteTargetClient : public OrcRemoteTargetRPCAPI { +public: + /// Remote memory manager. + class RCMemoryManager : public RuntimeDyld::MemoryManager { + public: + RCMemoryManager(OrcRemoteTargetClient &Client, ResourceIdMgr::ResourceId Id) + : Client(Client), Id(Id) { + DEBUG(dbgs() << "Created remote allocator " << Id << "\n"); + } + + RCMemoryManager(RCMemoryManager &&Other) + : Client(std::move(Other.Client)), Id(std::move(Other.Id)), + Unmapped(std::move(Other.Unmapped)), + Unfinalized(std::move(Other.Unfinalized)) {} + + RCMemoryManager operator=(RCMemoryManager &&Other) { + Client = std::move(Other.Client); + Id = std::move(Other.Id); + Unmapped = std::move(Other.Unmapped); + Unfinalized = std::move(Other.Unfinalized); + return *this; + } + + ~RCMemoryManager() { + Client.destroyRemoteAllocator(Id); + DEBUG(dbgs() << "Destroyed remote allocator " << Id << "\n"); + } + + uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, + StringRef SectionName) override { + Unmapped.back().CodeAllocs.emplace_back(Size, Alignment); + uint8_t *Alloc = reinterpret_cast<uint8_t *>( + Unmapped.back().CodeAllocs.back().getLocalAddress()); + DEBUG(dbgs() << "Allocator " << Id << " allocated code for " + << SectionName << ": " << Alloc << " (" << Size + << " bytes, alignment " << Alignment << ")\n"); + return Alloc; + } + + uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, StringRef SectionName, + bool IsReadOnly) override { + if (IsReadOnly) { + Unmapped.back().RODataAllocs.emplace_back(Size, Alignment); + uint8_t *Alloc = reinterpret_cast<uint8_t *>( + Unmapped.back().RODataAllocs.back().getLocalAddress()); + DEBUG(dbgs() << "Allocator " << Id << " allocated ro-data for " + << SectionName << ": " << Alloc << " (" << Size + << " bytes, alignment " << Alignment << ")\n"); + return Alloc; + } // else... + + Unmapped.back().RWDataAllocs.emplace_back(Size, Alignment); + uint8_t *Alloc = reinterpret_cast<uint8_t *>( + Unmapped.back().RWDataAllocs.back().getLocalAddress()); + DEBUG(dbgs() << "Allocator " << Id << " allocated rw-data for " + << SectionName << ": " << Alloc << " (" << Size + << " bytes, alignment " << Alignment << ")\n"); + return Alloc; + } + + void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign, + uintptr_t RODataSize, uint32_t RODataAlign, + uintptr_t RWDataSize, + uint32_t RWDataAlign) override { + Unmapped.push_back(ObjectAllocs()); + + DEBUG(dbgs() << "Allocator " << Id << " reserved:\n"); + + if (CodeSize != 0) { + std::error_code EC = Client.reserveMem(Unmapped.back().RemoteCodeAddr, + Id, CodeSize, CodeAlign); + // FIXME; Add error to poll. + assert(!EC && "Failed reserving remote memory."); + (void)EC; + DEBUG(dbgs() << " code: " + << format("0x%016x", Unmapped.back().RemoteCodeAddr) + << " (" << CodeSize << " bytes, alignment " << CodeAlign + << ")\n"); + } + + if (RODataSize != 0) { + std::error_code EC = Client.reserveMem(Unmapped.back().RemoteRODataAddr, + Id, RODataSize, RODataAlign); + // FIXME; Add error to poll. + assert(!EC && "Failed reserving remote memory."); + (void)EC; + DEBUG(dbgs() << " ro-data: " + << format("0x%016x", Unmapped.back().RemoteRODataAddr) + << " (" << RODataSize << " bytes, alignment " + << RODataAlign << ")\n"); + } + + if (RWDataSize != 0) { + std::error_code EC = Client.reserveMem(Unmapped.back().RemoteRWDataAddr, + Id, RWDataSize, RWDataAlign); + // FIXME; Add error to poll. + assert(!EC && "Failed reserving remote memory."); + (void)EC; + DEBUG(dbgs() << " rw-data: " + << format("0x%016x", Unmapped.back().RemoteRWDataAddr) + << " (" << RWDataSize << " bytes, alignment " + << RWDataAlign << ")\n"); + } + } + + bool needsToReserveAllocationSpace() override { return true; } + + void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, + size_t Size) override {} + + void deregisterEHFrames(uint8_t *addr, uint64_t LoadAddr, + size_t Size) override {} + + void notifyObjectLoaded(RuntimeDyld &Dyld, + const object::ObjectFile &Obj) override { + DEBUG(dbgs() << "Allocator " << Id << " applied mappings:\n"); + for (auto &ObjAllocs : Unmapped) { + { + TargetAddress NextCodeAddr = ObjAllocs.RemoteCodeAddr; + for (auto &Alloc : ObjAllocs.CodeAllocs) { + NextCodeAddr = RoundUpToAlignment(NextCodeAddr, Alloc.getAlign()); + Dyld.mapSectionAddress(Alloc.getLocalAddress(), NextCodeAddr); + DEBUG(dbgs() << " code: " + << static_cast<void *>(Alloc.getLocalAddress()) + << " -> " << format("0x%016x", NextCodeAddr) << "\n"); + Alloc.setRemoteAddress(NextCodeAddr); + NextCodeAddr += Alloc.getSize(); + } + } + { + TargetAddress NextRODataAddr = ObjAllocs.RemoteRODataAddr; + for (auto &Alloc : ObjAllocs.RODataAllocs) { + NextRODataAddr = + RoundUpToAlignment(NextRODataAddr, Alloc.getAlign()); + Dyld.mapSectionAddress(Alloc.getLocalAddress(), NextRODataAddr); + DEBUG(dbgs() << " ro-data: " + << static_cast<void *>(Alloc.getLocalAddress()) + << " -> " << format("0x%016x", NextRODataAddr) + << "\n"); + Alloc.setRemoteAddress(NextRODataAddr); + NextRODataAddr += Alloc.getSize(); + } + } + { + TargetAddress NextRWDataAddr = ObjAllocs.RemoteRWDataAddr; + for (auto &Alloc : ObjAllocs.RWDataAllocs) { + NextRWDataAddr = + RoundUpToAlignment(NextRWDataAddr, Alloc.getAlign()); + Dyld.mapSectionAddress(Alloc.getLocalAddress(), NextRWDataAddr); + DEBUG(dbgs() << " rw-data: " + << static_cast<void *>(Alloc.getLocalAddress()) + << " -> " << format("0x%016x", NextRWDataAddr) + << "\n"); + Alloc.setRemoteAddress(NextRWDataAddr); + NextRWDataAddr += Alloc.getSize(); + } + } + Unfinalized.push_back(std::move(ObjAllocs)); + } + Unmapped.clear(); + } + + bool finalizeMemory(std::string *ErrMsg = nullptr) override { + DEBUG(dbgs() << "Allocator " << Id << " finalizing:\n"); + + for (auto &ObjAllocs : Unfinalized) { + + for (auto &Alloc : ObjAllocs.CodeAllocs) { + DEBUG(dbgs() << " copying code: " + << static_cast<void *>(Alloc.getLocalAddress()) << " -> " + << format("0x%016x", Alloc.getRemoteAddress()) << " (" + << Alloc.getSize() << " bytes)\n"); + Client.writeMem(Alloc.getRemoteAddress(), Alloc.getLocalAddress(), + Alloc.getSize()); + } + + if (ObjAllocs.RemoteCodeAddr) { + DEBUG(dbgs() << " setting R-X permissions on code block: " + << format("0x%016x", ObjAllocs.RemoteCodeAddr) << "\n"); + Client.setProtections(Id, ObjAllocs.RemoteCodeAddr, + sys::Memory::MF_READ | sys::Memory::MF_EXEC); + } + + for (auto &Alloc : ObjAllocs.RODataAllocs) { + DEBUG(dbgs() << " copying ro-data: " + << static_cast<void *>(Alloc.getLocalAddress()) << " -> " + << format("0x%016x", Alloc.getRemoteAddress()) << " (" + << Alloc.getSize() << " bytes)\n"); + Client.writeMem(Alloc.getRemoteAddress(), Alloc.getLocalAddress(), + Alloc.getSize()); + } + + if (ObjAllocs.RemoteRODataAddr) { + DEBUG(dbgs() << " setting R-- permissions on ro-data block: " + << format("0x%016x", ObjAllocs.RemoteRODataAddr) + << "\n"); + Client.setProtections(Id, ObjAllocs.RemoteRODataAddr, + sys::Memory::MF_READ); + } + + for (auto &Alloc : ObjAllocs.RWDataAllocs) { + DEBUG(dbgs() << " copying rw-data: " + << static_cast<void *>(Alloc.getLocalAddress()) << " -> " + << format("0x%016x", Alloc.getRemoteAddress()) << " (" + << Alloc.getSize() << " bytes)\n"); + Client.writeMem(Alloc.getRemoteAddress(), Alloc.getLocalAddress(), + Alloc.getSize()); + } + + if (ObjAllocs.RemoteRWDataAddr) { + DEBUG(dbgs() << " setting RW- permissions on rw-data block: " + << format("0x%016x", ObjAllocs.RemoteRWDataAddr) + << "\n"); + Client.setProtections(Id, ObjAllocs.RemoteRWDataAddr, + sys::Memory::MF_READ | sys::Memory::MF_WRITE); + } + } + Unfinalized.clear(); + + return false; + } + + private: + class Alloc { + public: + Alloc(uint64_t Size, unsigned Align) + : Size(Size), Align(Align), Contents(new char[Size + Align - 1]), + RemoteAddr(0) {} + + Alloc(Alloc &&Other) + : Size(std::move(Other.Size)), Align(std::move(Other.Align)), + Contents(std::move(Other.Contents)), + RemoteAddr(std::move(Other.RemoteAddr)) {} + + Alloc &operator=(Alloc &&Other) { + Size = std::move(Other.Size); + Align = std::move(Other.Align); + Contents = std::move(Other.Contents); + RemoteAddr = std::move(Other.RemoteAddr); + return *this; + } + + uint64_t getSize() const { return Size; } + + unsigned getAlign() const { return Align; } + + char *getLocalAddress() const { + uintptr_t LocalAddr = reinterpret_cast<uintptr_t>(Contents.get()); + LocalAddr = RoundUpToAlignment(LocalAddr, Align); + return reinterpret_cast<char *>(LocalAddr); + } + + void setRemoteAddress(TargetAddress RemoteAddr) { + this->RemoteAddr = RemoteAddr; + } + + TargetAddress getRemoteAddress() const { return RemoteAddr; } + + private: + uint64_t Size; + unsigned Align; + std::unique_ptr<char[]> Contents; + TargetAddress RemoteAddr; + }; + + struct ObjectAllocs { + ObjectAllocs() + : RemoteCodeAddr(0), RemoteRODataAddr(0), RemoteRWDataAddr(0) {} + + ObjectAllocs(ObjectAllocs &&Other) + : RemoteCodeAddr(std::move(Other.RemoteCodeAddr)), + RemoteRODataAddr(std::move(Other.RemoteRODataAddr)), + RemoteRWDataAddr(std::move(Other.RemoteRWDataAddr)), + CodeAllocs(std::move(Other.CodeAllocs)), + RODataAllocs(std::move(Other.RODataAllocs)), + RWDataAllocs(std::move(Other.RWDataAllocs)) {} + + ObjectAllocs &operator=(ObjectAllocs &&Other) { + RemoteCodeAddr = std::move(Other.RemoteCodeAddr); + RemoteRODataAddr = std::move(Other.RemoteRODataAddr); + RemoteRWDataAddr = std::move(Other.RemoteRWDataAddr); + CodeAllocs = std::move(Other.CodeAllocs); + RODataAllocs = std::move(Other.RODataAllocs); + RWDataAllocs = std::move(Other.RWDataAllocs); + return *this; + } + + TargetAddress RemoteCodeAddr; + TargetAddress RemoteRODataAddr; + TargetAddress RemoteRWDataAddr; + std::vector<Alloc> CodeAllocs, RODataAllocs, RWDataAllocs; + }; + + OrcRemoteTargetClient &Client; + ResourceIdMgr::ResourceId Id; + std::vector<ObjectAllocs> Unmapped; + std::vector<ObjectAllocs> Unfinalized; + }; + + /// Remote indirect stubs manager. + class RCIndirectStubsManager : public IndirectStubsManager { + public: + RCIndirectStubsManager(OrcRemoteTargetClient &Remote, + ResourceIdMgr::ResourceId Id) + : Remote(Remote), Id(Id) {} + + ~RCIndirectStubsManager() { Remote.destroyIndirectStubsManager(Id); } + + std::error_code createStub(StringRef StubName, TargetAddress StubAddr, + JITSymbolFlags StubFlags) override { + if (auto EC = reserveStubs(1)) + return EC; + + return createStubInternal(StubName, StubAddr, StubFlags); + } + + std::error_code createStubs(const StubInitsMap &StubInits) override { + if (auto EC = reserveStubs(StubInits.size())) + return EC; + + for (auto &Entry : StubInits) + if (auto EC = createStubInternal(Entry.first(), Entry.second.first, + Entry.second.second)) + return EC; + + return std::error_code(); + } + + JITSymbol findStub(StringRef Name, bool ExportedStubsOnly) override { + auto I = StubIndexes.find(Name); + if (I == StubIndexes.end()) + return nullptr; + auto Key = I->second.first; + auto Flags = I->second.second; + auto StubSymbol = JITSymbol(getStubAddr(Key), Flags); + if (ExportedStubsOnly && !StubSymbol.isExported()) + return nullptr; + return StubSymbol; + } + + JITSymbol findPointer(StringRef Name) override { + auto I = StubIndexes.find(Name); + if (I == StubIndexes.end()) + return nullptr; + auto Key = I->second.first; + auto Flags = I->second.second; + return JITSymbol(getPtrAddr(Key), Flags); + } + + std::error_code updatePointer(StringRef Name, + TargetAddress NewAddr) override { + auto I = StubIndexes.find(Name); + assert(I != StubIndexes.end() && "No stub pointer for symbol"); + auto Key = I->second.first; + return Remote.writePointer(getPtrAddr(Key), NewAddr); + } + + private: + struct RemoteIndirectStubsInfo { + RemoteIndirectStubsInfo(TargetAddress StubBase, TargetAddress PtrBase, + unsigned NumStubs) + : StubBase(StubBase), PtrBase(PtrBase), NumStubs(NumStubs) {} + TargetAddress StubBase; + TargetAddress PtrBase; + unsigned NumStubs; + }; + + OrcRemoteTargetClient &Remote; + ResourceIdMgr::ResourceId Id; + std::vector<RemoteIndirectStubsInfo> RemoteIndirectStubsInfos; + typedef std::pair<uint16_t, uint16_t> StubKey; + std::vector<StubKey> FreeStubs; + StringMap<std::pair<StubKey, JITSymbolFlags>> StubIndexes; + + std::error_code reserveStubs(unsigned NumStubs) { + if (NumStubs <= FreeStubs.size()) + return std::error_code(); + + unsigned NewStubsRequired = NumStubs - FreeStubs.size(); + TargetAddress StubBase; + TargetAddress PtrBase; + unsigned NumStubsEmitted; + + Remote.emitIndirectStubs(StubBase, PtrBase, NumStubsEmitted, Id, + NewStubsRequired); + + unsigned NewBlockId = RemoteIndirectStubsInfos.size(); + RemoteIndirectStubsInfos.push_back( + RemoteIndirectStubsInfo(StubBase, PtrBase, NumStubsEmitted)); + + for (unsigned I = 0; I < NumStubsEmitted; ++I) + FreeStubs.push_back(std::make_pair(NewBlockId, I)); + + return std::error_code(); + } + + std::error_code createStubInternal(StringRef StubName, + TargetAddress InitAddr, + JITSymbolFlags StubFlags) { + auto Key = FreeStubs.back(); + FreeStubs.pop_back(); + StubIndexes[StubName] = std::make_pair(Key, StubFlags); + return Remote.writePointer(getPtrAddr(Key), InitAddr); + } + + TargetAddress getStubAddr(StubKey K) { + assert(RemoteIndirectStubsInfos[K.first].StubBase != 0 && + "Missing stub address"); + return RemoteIndirectStubsInfos[K.first].StubBase + + K.second * Remote.getIndirectStubSize(); + } + + TargetAddress getPtrAddr(StubKey K) { + assert(RemoteIndirectStubsInfos[K.first].PtrBase != 0 && + "Missing pointer address"); + return RemoteIndirectStubsInfos[K.first].PtrBase + + K.second * Remote.getPointerSize(); + } + }; + + /// Remote compile callback manager. + class RCCompileCallbackManager : public JITCompileCallbackManager { + public: + RCCompileCallbackManager(TargetAddress ErrorHandlerAddress, + OrcRemoteTargetClient &Remote) + : JITCompileCallbackManager(ErrorHandlerAddress), Remote(Remote) { + assert(!Remote.CompileCallback && "Compile callback already set"); + Remote.CompileCallback = [this](TargetAddress TrampolineAddr) { + return executeCompileCallback(TrampolineAddr); + }; + Remote.emitResolverBlock(); + } + + private: + void grow() { + TargetAddress BlockAddr = 0; + uint32_t NumTrampolines = 0; + auto EC = Remote.emitTrampolineBlock(BlockAddr, NumTrampolines); + assert(!EC && "Failed to create trampolines"); + + uint32_t TrampolineSize = Remote.getTrampolineSize(); + for (unsigned I = 0; I < NumTrampolines; ++I) + this->AvailableTrampolines.push_back(BlockAddr + (I * TrampolineSize)); + } + + OrcRemoteTargetClient &Remote; + }; + + /// Create an OrcRemoteTargetClient. + /// Channel is the ChannelT instance to communicate on. It is assumed that + /// the channel is ready to be read from and written to. + static ErrorOr<OrcRemoteTargetClient> Create(ChannelT &Channel) { + std::error_code EC; + OrcRemoteTargetClient H(Channel, EC); + if (EC) + return EC; + return H; + } + + /// Call the int(void) function at the given address in the target and return + /// its result. + std::error_code callIntVoid(int &Result, TargetAddress Addr) { + DEBUG(dbgs() << "Calling int(*)(void) " << format("0x%016x", Addr) << "\n"); + + if (auto EC = call<CallIntVoid>(Channel, Addr)) + return EC; + + unsigned NextProcId; + if (auto EC = listenForCompileRequests(NextProcId)) + return EC; + + if (NextProcId != CallIntVoidResponseId) + return orcError(OrcErrorCode::UnexpectedRPCCall); + + return handle<CallIntVoidResponse>(Channel, [&](int R) { + Result = R; + DEBUG(dbgs() << "Result: " << R << "\n"); + return std::error_code(); + }); + } + + /// Call the int(int, char*[]) function at the given address in the target and + /// return its result. + std::error_code callMain(int &Result, TargetAddress Addr, + const std::vector<std::string> &Args) { + DEBUG(dbgs() << "Calling int(*)(int, char*[]) " << format("0x%016x", Addr) + << "\n"); + + if (auto EC = call<CallMain>(Channel, Addr, Args)) + return EC; + + unsigned NextProcId; + if (auto EC = listenForCompileRequests(NextProcId)) + return EC; + + if (NextProcId != CallMainResponseId) + return orcError(OrcErrorCode::UnexpectedRPCCall); + + return handle<CallMainResponse>(Channel, [&](int R) { + Result = R; + DEBUG(dbgs() << "Result: " << R << "\n"); + return std::error_code(); + }); + } + + /// Call the void() function at the given address in the target and wait for + /// it to finish. + std::error_code callVoidVoid(TargetAddress Addr) { + DEBUG(dbgs() << "Calling void(*)(void) " << format("0x%016x", Addr) + << "\n"); + + if (auto EC = call<CallVoidVoid>(Channel, Addr)) + return EC; + + unsigned NextProcId; + if (auto EC = listenForCompileRequests(NextProcId)) + return EC; + + if (NextProcId != CallVoidVoidResponseId) + return orcError(OrcErrorCode::UnexpectedRPCCall); + + return handle<CallVoidVoidResponse>(Channel, doNothing); + } + + /// Create an RCMemoryManager which will allocate its memory on the remote + /// target. + std::error_code + createRemoteMemoryManager(std::unique_ptr<RCMemoryManager> &MM) { + assert(!MM && "MemoryManager should be null before creation."); + + auto Id = AllocatorIds.getNext(); + if (auto EC = call<CreateRemoteAllocator>(Channel, Id)) + return EC; + MM = llvm::make_unique<RCMemoryManager>(*this, Id); + return std::error_code(); + } + + /// Create an RCIndirectStubsManager that will allocate stubs on the remote + /// target. + std::error_code + createIndirectStubsManager(std::unique_ptr<RCIndirectStubsManager> &I) { + assert(!I && "Indirect stubs manager should be null before creation."); + auto Id = IndirectStubOwnerIds.getNext(); + if (auto EC = call<CreateIndirectStubsOwner>(Channel, Id)) + return EC; + I = llvm::make_unique<RCIndirectStubsManager>(*this, Id); + return std::error_code(); + } + + /// Search for symbols in the remote process. Note: This should be used by + /// symbol resolvers *after* they've searched the local symbol table in the + /// JIT stack. + std::error_code getSymbolAddress(TargetAddress &Addr, StringRef Name) { + // Check for an 'out-of-band' error, e.g. from an MM destructor. + if (ExistingError) + return ExistingError; + + // Request remote symbol address. + if (auto EC = call<GetSymbolAddress>(Channel, Name)) + return EC; + + return expect<GetSymbolAddressResponse>(Channel, [&](TargetAddress &A) { + Addr = A; + DEBUG(dbgs() << "Remote address lookup " << Name << " = " + << format("0x%016x", Addr) << "\n"); + return std::error_code(); + }); + } + + /// Get the triple for the remote target. + const std::string &getTargetTriple() const { return RemoteTargetTriple; } + + std::error_code terminateSession() { return call<TerminateSession>(Channel); } + +private: + OrcRemoteTargetClient(ChannelT &Channel, std::error_code &EC) + : Channel(Channel), RemotePointerSize(0), RemotePageSize(0), + RemoteTrampolineSize(0), RemoteIndirectStubSize(0) { + if ((EC = call<GetRemoteInfo>(Channel))) + return; + + EC = expect<GetRemoteInfoResponse>( + Channel, readArgs(RemoteTargetTriple, RemotePointerSize, RemotePageSize, + RemoteTrampolineSize, RemoteIndirectStubSize)); + } + + void destroyRemoteAllocator(ResourceIdMgr::ResourceId Id) { + if (auto EC = call<DestroyRemoteAllocator>(Channel, Id)) { + // FIXME: This will be triggered by a removeModuleSet call: Propagate + // error return up through that. + llvm_unreachable("Failed to destroy remote allocator."); + AllocatorIds.release(Id); + } + } + + std::error_code destroyIndirectStubsManager(ResourceIdMgr::ResourceId Id) { + IndirectStubOwnerIds.release(Id); + return call<DestroyIndirectStubsOwner>(Channel, Id); + } + + std::error_code emitIndirectStubs(TargetAddress &StubBase, + TargetAddress &PtrBase, + uint32_t &NumStubsEmitted, + ResourceIdMgr::ResourceId Id, + uint32_t NumStubsRequired) { + if (auto EC = call<EmitIndirectStubs>(Channel, Id, NumStubsRequired)) + return EC; + + return expect<EmitIndirectStubsResponse>( + Channel, readArgs(StubBase, PtrBase, NumStubsEmitted)); + } + + std::error_code emitResolverBlock() { + // Check for an 'out-of-band' error, e.g. from an MM destructor. + if (ExistingError) + return ExistingError; + + return call<EmitResolverBlock>(Channel); + } + + std::error_code emitTrampolineBlock(TargetAddress &BlockAddr, + uint32_t &NumTrampolines) { + // Check for an 'out-of-band' error, e.g. from an MM destructor. + if (ExistingError) + return ExistingError; + + if (auto EC = call<EmitTrampolineBlock>(Channel)) + return EC; + + return expect<EmitTrampolineBlockResponse>( + Channel, [&](TargetAddress BAddr, uint32_t NTrampolines) { + BlockAddr = BAddr; + NumTrampolines = NTrampolines; + return std::error_code(); + }); + } + + uint32_t getIndirectStubSize() const { return RemoteIndirectStubSize; } + uint32_t getPageSize() const { return RemotePageSize; } + uint32_t getPointerSize() const { return RemotePointerSize; } + + uint32_t getTrampolineSize() const { return RemoteTrampolineSize; } + + std::error_code listenForCompileRequests(uint32_t &NextId) { + // Check for an 'out-of-band' error, e.g. from an MM destructor. + if (ExistingError) + return ExistingError; + + if (auto EC = getNextProcId(Channel, NextId)) + return EC; + + while (NextId == RequestCompileId) { + TargetAddress TrampolineAddr = 0; + if (auto EC = handle<RequestCompile>(Channel, readArgs(TrampolineAddr))) + return EC; + + TargetAddress ImplAddr = CompileCallback(TrampolineAddr); + if (auto EC = call<RequestCompileResponse>(Channel, ImplAddr)) + return EC; + + if (auto EC = getNextProcId(Channel, NextId)) + return EC; + } + + return std::error_code(); + } + + std::error_code readMem(char *Dst, TargetAddress Src, uint64_t Size) { + // Check for an 'out-of-band' error, e.g. from an MM destructor. + if (ExistingError) + return ExistingError; + + if (auto EC = call<ReadMem>(Channel, Src, Size)) + return EC; + + if (auto EC = expect<ReadMemResponse>( + Channel, [&]() { return Channel.readBytes(Dst, Size); })) + return EC; + + return std::error_code(); + } + + std::error_code reserveMem(TargetAddress &RemoteAddr, + ResourceIdMgr::ResourceId Id, uint64_t Size, + uint32_t Align) { + + // Check for an 'out-of-band' error, e.g. from an MM destructor. + if (ExistingError) + return ExistingError; + + if (std::error_code EC = call<ReserveMem>(Channel, Id, Size, Align)) + return EC; + + return expect<ReserveMemResponse>(Channel, readArgs(RemoteAddr)); + } + + std::error_code setProtections(ResourceIdMgr::ResourceId Id, + TargetAddress RemoteSegAddr, + unsigned ProtFlags) { + return call<SetProtections>(Channel, Id, RemoteSegAddr, ProtFlags); + } + + std::error_code writeMem(TargetAddress Addr, const char *Src, uint64_t Size) { + // Check for an 'out-of-band' error, e.g. from an MM destructor. + if (ExistingError) + return ExistingError; + + // Make the send call. + if (auto EC = call<WriteMem>(Channel, Addr, Size)) + return EC; + + // Follow this up with the section contents. + if (auto EC = Channel.appendBytes(Src, Size)) + return EC; + + return Channel.send(); + } + + std::error_code writePointer(TargetAddress Addr, TargetAddress PtrVal) { + // Check for an 'out-of-band' error, e.g. from an MM destructor. + if (ExistingError) + return ExistingError; + + return call<WritePtr>(Channel, Addr, PtrVal); + } + + static std::error_code doNothing() { return std::error_code(); } + + ChannelT &Channel; + std::error_code ExistingError; + std::string RemoteTargetTriple; + uint32_t RemotePointerSize; + uint32_t RemotePageSize; + uint32_t RemoteTrampolineSize; + uint32_t RemoteIndirectStubSize; + ResourceIdMgr AllocatorIds, IndirectStubOwnerIds; + std::function<TargetAddress(TargetAddress)> CompileCallback; +}; + +} // end namespace remote +} // end namespace orc +} // end namespace llvm + +#undef DEBUG_TYPE + +#endif diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h new file mode 100644 index 0000000..96dc242 --- /dev/null +++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h @@ -0,0 +1,185 @@ +//===--- OrcRemoteTargetRPCAPI.h - Orc Remote-target RPC API ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Orc remote-target RPC API. It should not be used +// directly, but is used by the RemoteTargetClient and RemoteTargetServer +// classes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H +#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H + +#include "JITSymbol.h" +#include "RPCChannel.h" +#include "RPCUtils.h" + +namespace llvm { +namespace orc { +namespace remote { + +class OrcRemoteTargetRPCAPI : public RPC<RPCChannel> { +protected: + class ResourceIdMgr { + public: + typedef uint64_t ResourceId; + ResourceIdMgr() : NextId(0) {} + ResourceId getNext() { + if (!FreeIds.empty()) { + ResourceId I = FreeIds.back(); + FreeIds.pop_back(); + return I; + } + return NextId++; + } + void release(ResourceId I) { FreeIds.push_back(I); } + + private: + ResourceId NextId; + std::vector<ResourceId> FreeIds; + }; + +public: + enum JITProcId : uint32_t { + InvalidId = 0, + CallIntVoidId, + CallIntVoidResponseId, + CallMainId, + CallMainResponseId, + CallVoidVoidId, + CallVoidVoidResponseId, + CreateRemoteAllocatorId, + CreateIndirectStubsOwnerId, + DestroyRemoteAllocatorId, + DestroyIndirectStubsOwnerId, + EmitIndirectStubsId, + EmitIndirectStubsResponseId, + EmitResolverBlockId, + EmitTrampolineBlockId, + EmitTrampolineBlockResponseId, + GetSymbolAddressId, + GetSymbolAddressResponseId, + GetRemoteInfoId, + GetRemoteInfoResponseId, + ReadMemId, + ReadMemResponseId, + ReserveMemId, + ReserveMemResponseId, + RequestCompileId, + RequestCompileResponseId, + SetProtectionsId, + TerminateSessionId, + WriteMemId, + WritePtrId + }; + + static const char *getJITProcIdName(JITProcId Id); + + typedef Procedure<CallIntVoidId, TargetAddress /* FnAddr */> CallIntVoid; + + typedef Procedure<CallIntVoidResponseId, int /* Result */> + CallIntVoidResponse; + + typedef Procedure<CallMainId, TargetAddress /* FnAddr */, + std::vector<std::string> /* Args */> + CallMain; + + typedef Procedure<CallMainResponseId, int /* Result */> CallMainResponse; + + typedef Procedure<CallVoidVoidId, TargetAddress /* FnAddr */> CallVoidVoid; + + typedef Procedure<CallVoidVoidResponseId> CallVoidVoidResponse; + + typedef Procedure<CreateRemoteAllocatorId, + ResourceIdMgr::ResourceId /* Allocator ID */> + CreateRemoteAllocator; + + typedef Procedure<CreateIndirectStubsOwnerId, + ResourceIdMgr::ResourceId /* StubsOwner ID */> + CreateIndirectStubsOwner; + + typedef Procedure<DestroyRemoteAllocatorId, + ResourceIdMgr::ResourceId /* Allocator ID */> + DestroyRemoteAllocator; + + typedef Procedure<DestroyIndirectStubsOwnerId, + ResourceIdMgr::ResourceId /* StubsOwner ID */> + DestroyIndirectStubsOwner; + + typedef Procedure<EmitIndirectStubsId, + ResourceIdMgr::ResourceId /* StubsOwner ID */, + uint32_t /* NumStubsRequired */> + EmitIndirectStubs; + + typedef Procedure< + EmitIndirectStubsResponseId, TargetAddress /* StubsBaseAddr */, + TargetAddress /* PtrsBaseAddr */, uint32_t /* NumStubsEmitted */> + EmitIndirectStubsResponse; + + typedef Procedure<EmitResolverBlockId> EmitResolverBlock; + + typedef Procedure<EmitTrampolineBlockId> EmitTrampolineBlock; + + typedef Procedure<EmitTrampolineBlockResponseId, + TargetAddress /* BlockAddr */, + uint32_t /* NumTrampolines */> + EmitTrampolineBlockResponse; + + typedef Procedure<GetSymbolAddressId, std::string /*SymbolName*/> + GetSymbolAddress; + + typedef Procedure<GetSymbolAddressResponseId, uint64_t /* SymbolAddr */> + GetSymbolAddressResponse; + + typedef Procedure<GetRemoteInfoId> GetRemoteInfo; + + typedef Procedure<GetRemoteInfoResponseId, std::string /* Triple */, + uint32_t /* PointerSize */, uint32_t /* PageSize */, + uint32_t /* TrampolineSize */, + uint32_t /* IndirectStubSize */> + GetRemoteInfoResponse; + + typedef Procedure<ReadMemId, TargetAddress /* Src */, uint64_t /* Size */> + ReadMem; + + typedef Procedure<ReadMemResponseId> ReadMemResponse; + + typedef Procedure<ReserveMemId, ResourceIdMgr::ResourceId /* Id */, + uint64_t /* Size */, uint32_t /* Align */> + ReserveMem; + + typedef Procedure<ReserveMemResponseId, TargetAddress /* Addr */> + ReserveMemResponse; + + typedef Procedure<RequestCompileId, TargetAddress /* TrampolineAddr */> + RequestCompile; + + typedef Procedure<RequestCompileResponseId, TargetAddress /* ImplAddr */> + RequestCompileResponse; + + typedef Procedure<SetProtectionsId, ResourceIdMgr::ResourceId /* Id */, + TargetAddress /* Dst */, uint32_t /* ProtFlags */> + SetProtections; + + typedef Procedure<TerminateSessionId> TerminateSession; + + typedef Procedure<WriteMemId, TargetAddress /* Dst */, uint64_t /* Size */ + /* Data should follow */> + WriteMem; + + typedef Procedure<WritePtrId, TargetAddress /* Dst */, + TargetAddress /* Val */> + WritePtr; +}; + +} // end namespace remote +} // end namespace orc +} // end namespace llvm + +#endif diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h new file mode 100644 index 0000000..5247661 --- /dev/null +++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h @@ -0,0 +1,432 @@ +//===---- OrcRemoteTargetServer.h - Orc Remote-target Server ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the OrcRemoteTargetServer class. It can be used to build a +// JIT server that can execute code sent from an OrcRemoteTargetClient. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H +#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H + +#include "OrcRemoteTargetRPCAPI.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/raw_ostream.h" +#include <map> + +#define DEBUG_TYPE "orc-remote" + +namespace llvm { +namespace orc { +namespace remote { + +template <typename ChannelT, typename TargetT> +class OrcRemoteTargetServer : public OrcRemoteTargetRPCAPI { +public: + typedef std::function<TargetAddress(const std::string &Name)> + SymbolLookupFtor; + + OrcRemoteTargetServer(ChannelT &Channel, SymbolLookupFtor SymbolLookup) + : Channel(Channel), SymbolLookup(std::move(SymbolLookup)) {} + + std::error_code getNextProcId(JITProcId &Id) { + return deserialize(Channel, Id); + } + + std::error_code handleKnownProcedure(JITProcId Id) { + typedef OrcRemoteTargetServer ThisT; + + DEBUG(dbgs() << "Handling known proc: " << getJITProcIdName(Id) << "\n"); + + switch (Id) { + case CallIntVoidId: + return handle<CallIntVoid>(Channel, *this, &ThisT::handleCallIntVoid); + case CallMainId: + return handle<CallMain>(Channel, *this, &ThisT::handleCallMain); + case CallVoidVoidId: + return handle<CallVoidVoid>(Channel, *this, &ThisT::handleCallVoidVoid); + case CreateRemoteAllocatorId: + return handle<CreateRemoteAllocator>(Channel, *this, + &ThisT::handleCreateRemoteAllocator); + case CreateIndirectStubsOwnerId: + return handle<CreateIndirectStubsOwner>( + Channel, *this, &ThisT::handleCreateIndirectStubsOwner); + case DestroyRemoteAllocatorId: + return handle<DestroyRemoteAllocator>( + Channel, *this, &ThisT::handleDestroyRemoteAllocator); + case DestroyIndirectStubsOwnerId: + return handle<DestroyIndirectStubsOwner>( + Channel, *this, &ThisT::handleDestroyIndirectStubsOwner); + case EmitIndirectStubsId: + return handle<EmitIndirectStubs>(Channel, *this, + &ThisT::handleEmitIndirectStubs); + case EmitResolverBlockId: + return handle<EmitResolverBlock>(Channel, *this, + &ThisT::handleEmitResolverBlock); + case EmitTrampolineBlockId: + return handle<EmitTrampolineBlock>(Channel, *this, + &ThisT::handleEmitTrampolineBlock); + case GetSymbolAddressId: + return handle<GetSymbolAddress>(Channel, *this, + &ThisT::handleGetSymbolAddress); + case GetRemoteInfoId: + return handle<GetRemoteInfo>(Channel, *this, &ThisT::handleGetRemoteInfo); + case ReadMemId: + return handle<ReadMem>(Channel, *this, &ThisT::handleReadMem); + case ReserveMemId: + return handle<ReserveMem>(Channel, *this, &ThisT::handleReserveMem); + case SetProtectionsId: + return handle<SetProtections>(Channel, *this, + &ThisT::handleSetProtections); + case WriteMemId: + return handle<WriteMem>(Channel, *this, &ThisT::handleWriteMem); + case WritePtrId: + return handle<WritePtr>(Channel, *this, &ThisT::handleWritePtr); + default: + return orcError(OrcErrorCode::UnexpectedRPCCall); + } + + llvm_unreachable("Unhandled JIT RPC procedure Id."); + } + + std::error_code requestCompile(TargetAddress &CompiledFnAddr, + TargetAddress TrampolineAddr) { + if (auto EC = call<RequestCompile>(Channel, TrampolineAddr)) + return EC; + + while (1) { + JITProcId Id = InvalidId; + if (auto EC = getNextProcId(Id)) + return EC; + + switch (Id) { + case RequestCompileResponseId: + return handle<RequestCompileResponse>(Channel, + readArgs(CompiledFnAddr)); + default: + if (auto EC = handleKnownProcedure(Id)) + return EC; + } + } + + llvm_unreachable("Fell through request-compile command loop."); + } + +private: + struct Allocator { + Allocator() = default; + Allocator(Allocator &&Other) : Allocs(std::move(Other.Allocs)) {} + Allocator &operator=(Allocator &&Other) { + Allocs = std::move(Other.Allocs); + return *this; + } + + ~Allocator() { + for (auto &Alloc : Allocs) + sys::Memory::releaseMappedMemory(Alloc.second); + } + + std::error_code allocate(void *&Addr, size_t Size, uint32_t Align) { + std::error_code EC; + sys::MemoryBlock MB = sys::Memory::allocateMappedMemory( + Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC); + if (EC) + return EC; + + Addr = MB.base(); + assert(Allocs.find(MB.base()) == Allocs.end() && "Duplicate alloc"); + Allocs[MB.base()] = std::move(MB); + return std::error_code(); + } + + std::error_code setProtections(void *block, unsigned Flags) { + auto I = Allocs.find(block); + if (I == Allocs.end()) + return orcError(OrcErrorCode::RemoteMProtectAddrUnrecognized); + return sys::Memory::protectMappedMemory(I->second, Flags); + } + + private: + std::map<void *, sys::MemoryBlock> Allocs; + }; + + static std::error_code doNothing() { return std::error_code(); } + + static TargetAddress reenter(void *JITTargetAddr, void *TrampolineAddr) { + TargetAddress CompiledFnAddr = 0; + + auto T = static_cast<OrcRemoteTargetServer *>(JITTargetAddr); + auto EC = T->requestCompile( + CompiledFnAddr, static_cast<TargetAddress>( + reinterpret_cast<uintptr_t>(TrampolineAddr))); + assert(!EC && "Compile request failed"); + (void)EC; + return CompiledFnAddr; + } + + std::error_code handleCallIntVoid(TargetAddress Addr) { + typedef int (*IntVoidFnTy)(); + IntVoidFnTy Fn = + reinterpret_cast<IntVoidFnTy>(static_cast<uintptr_t>(Addr)); + + DEBUG(dbgs() << " Calling " + << reinterpret_cast<void *>(reinterpret_cast<intptr_t>(Fn)) + << "\n"); + int Result = Fn(); + DEBUG(dbgs() << " Result = " << Result << "\n"); + + return call<CallIntVoidResponse>(Channel, Result); + } + + std::error_code handleCallMain(TargetAddress Addr, + std::vector<std::string> Args) { + typedef int (*MainFnTy)(int, const char *[]); + + MainFnTy Fn = reinterpret_cast<MainFnTy>(static_cast<uintptr_t>(Addr)); + int ArgC = Args.size() + 1; + int Idx = 1; + std::unique_ptr<const char *[]> ArgV(new const char *[ArgC + 1]); + ArgV[0] = "<jit process>"; + for (auto &Arg : Args) + ArgV[Idx++] = Arg.c_str(); + + DEBUG(dbgs() << " Calling " << reinterpret_cast<void *>(Fn) << "\n"); + int Result = Fn(ArgC, ArgV.get()); + DEBUG(dbgs() << " Result = " << Result << "\n"); + + return call<CallMainResponse>(Channel, Result); + } + + std::error_code handleCallVoidVoid(TargetAddress Addr) { + typedef void (*VoidVoidFnTy)(); + VoidVoidFnTy Fn = + reinterpret_cast<VoidVoidFnTy>(static_cast<uintptr_t>(Addr)); + + DEBUG(dbgs() << " Calling " << reinterpret_cast<void *>(Fn) << "\n"); + Fn(); + DEBUG(dbgs() << " Complete.\n"); + + return call<CallVoidVoidResponse>(Channel); + } + + std::error_code handleCreateRemoteAllocator(ResourceIdMgr::ResourceId Id) { + auto I = Allocators.find(Id); + if (I != Allocators.end()) + return orcError(OrcErrorCode::RemoteAllocatorIdAlreadyInUse); + DEBUG(dbgs() << " Created allocator " << Id << "\n"); + Allocators[Id] = Allocator(); + return std::error_code(); + } + + std::error_code handleCreateIndirectStubsOwner(ResourceIdMgr::ResourceId Id) { + auto I = IndirectStubsOwners.find(Id); + if (I != IndirectStubsOwners.end()) + return orcError(OrcErrorCode::RemoteIndirectStubsOwnerIdAlreadyInUse); + DEBUG(dbgs() << " Create indirect stubs owner " << Id << "\n"); + IndirectStubsOwners[Id] = ISBlockOwnerList(); + return std::error_code(); + } + + std::error_code handleDestroyRemoteAllocator(ResourceIdMgr::ResourceId Id) { + auto I = Allocators.find(Id); + if (I == Allocators.end()) + return orcError(OrcErrorCode::RemoteAllocatorDoesNotExist); + Allocators.erase(I); + DEBUG(dbgs() << " Destroyed allocator " << Id << "\n"); + return std::error_code(); + } + + std::error_code + handleDestroyIndirectStubsOwner(ResourceIdMgr::ResourceId Id) { + auto I = IndirectStubsOwners.find(Id); + if (I == IndirectStubsOwners.end()) + return orcError(OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist); + IndirectStubsOwners.erase(I); + return std::error_code(); + } + + std::error_code handleEmitIndirectStubs(ResourceIdMgr::ResourceId Id, + uint32_t NumStubsRequired) { + DEBUG(dbgs() << " ISMgr " << Id << " request " << NumStubsRequired + << " stubs.\n"); + + auto StubOwnerItr = IndirectStubsOwners.find(Id); + if (StubOwnerItr == IndirectStubsOwners.end()) + return orcError(OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist); + + typename TargetT::IndirectStubsInfo IS; + if (auto EC = + TargetT::emitIndirectStubsBlock(IS, NumStubsRequired, nullptr)) + return EC; + + TargetAddress StubsBase = + static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(IS.getStub(0))); + TargetAddress PtrsBase = + static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(IS.getPtr(0))); + uint32_t NumStubsEmitted = IS.getNumStubs(); + + auto &BlockList = StubOwnerItr->second; + BlockList.push_back(std::move(IS)); + + return call<EmitIndirectStubsResponse>(Channel, StubsBase, PtrsBase, + NumStubsEmitted); + } + + std::error_code handleEmitResolverBlock() { + std::error_code EC; + ResolverBlock = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory( + TargetT::ResolverCodeSize, nullptr, + sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC)); + if (EC) + return EC; + + TargetT::writeResolverCode(static_cast<uint8_t *>(ResolverBlock.base()), + &reenter, this); + + return sys::Memory::protectMappedMemory(ResolverBlock.getMemoryBlock(), + sys::Memory::MF_READ | + sys::Memory::MF_EXEC); + } + + std::error_code handleEmitTrampolineBlock() { + std::error_code EC; + auto TrampolineBlock = + sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory( + sys::Process::getPageSize(), nullptr, + sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC)); + if (EC) + return EC; + + unsigned NumTrampolines = + (sys::Process::getPageSize() - TargetT::PointerSize) / + TargetT::TrampolineSize; + + uint8_t *TrampolineMem = static_cast<uint8_t *>(TrampolineBlock.base()); + TargetT::writeTrampolines(TrampolineMem, ResolverBlock.base(), + NumTrampolines); + + EC = sys::Memory::protectMappedMemory(TrampolineBlock.getMemoryBlock(), + sys::Memory::MF_READ | + sys::Memory::MF_EXEC); + + TrampolineBlocks.push_back(std::move(TrampolineBlock)); + + return call<EmitTrampolineBlockResponse>( + Channel, + static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(TrampolineMem)), + NumTrampolines); + } + + std::error_code handleGetSymbolAddress(const std::string &Name) { + TargetAddress Addr = SymbolLookup(Name); + DEBUG(dbgs() << " Symbol '" << Name << "' = " << format("0x%016x", Addr) + << "\n"); + return call<GetSymbolAddressResponse>(Channel, Addr); + } + + std::error_code handleGetRemoteInfo() { + std::string ProcessTriple = sys::getProcessTriple(); + uint32_t PointerSize = TargetT::PointerSize; + uint32_t PageSize = sys::Process::getPageSize(); + uint32_t TrampolineSize = TargetT::TrampolineSize; + uint32_t IndirectStubSize = TargetT::IndirectStubsInfo::StubSize; + DEBUG(dbgs() << " Remote info:\n" + << " triple = '" << ProcessTriple << "'\n" + << " pointer size = " << PointerSize << "\n" + << " page size = " << PageSize << "\n" + << " trampoline size = " << TrampolineSize << "\n" + << " indirect stub size = " << IndirectStubSize << "\n"); + return call<GetRemoteInfoResponse>(Channel, ProcessTriple, PointerSize, + PageSize, TrampolineSize, + IndirectStubSize); + } + + std::error_code handleReadMem(TargetAddress RSrc, uint64_t Size) { + char *Src = reinterpret_cast<char *>(static_cast<uintptr_t>(RSrc)); + + DEBUG(dbgs() << " Reading " << Size << " bytes from " + << static_cast<void *>(Src) << "\n"); + + if (auto EC = call<ReadMemResponse>(Channel)) + return EC; + + if (auto EC = Channel.appendBytes(Src, Size)) + return EC; + + return Channel.send(); + } + + std::error_code handleReserveMem(ResourceIdMgr::ResourceId Id, uint64_t Size, + uint32_t Align) { + auto I = Allocators.find(Id); + if (I == Allocators.end()) + return orcError(OrcErrorCode::RemoteAllocatorDoesNotExist); + auto &Allocator = I->second; + void *LocalAllocAddr = nullptr; + if (auto EC = Allocator.allocate(LocalAllocAddr, Size, Align)) + return EC; + + DEBUG(dbgs() << " Allocator " << Id << " reserved " << LocalAllocAddr + << " (" << Size << " bytes, alignment " << Align << ")\n"); + + TargetAddress AllocAddr = + static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(LocalAllocAddr)); + + return call<ReserveMemResponse>(Channel, AllocAddr); + } + + std::error_code handleSetProtections(ResourceIdMgr::ResourceId Id, + TargetAddress Addr, uint32_t Flags) { + auto I = Allocators.find(Id); + if (I == Allocators.end()) + return orcError(OrcErrorCode::RemoteAllocatorDoesNotExist); + auto &Allocator = I->second; + void *LocalAddr = reinterpret_cast<void *>(static_cast<uintptr_t>(Addr)); + DEBUG(dbgs() << " Allocator " << Id << " set permissions on " << LocalAddr + << " to " << (Flags & sys::Memory::MF_READ ? 'R' : '-') + << (Flags & sys::Memory::MF_WRITE ? 'W' : '-') + << (Flags & sys::Memory::MF_EXEC ? 'X' : '-') << "\n"); + return Allocator.setProtections(LocalAddr, Flags); + } + + std::error_code handleWriteMem(TargetAddress RDst, uint64_t Size) { + char *Dst = reinterpret_cast<char *>(static_cast<uintptr_t>(RDst)); + DEBUG(dbgs() << " Writing " << Size << " bytes to " + << format("0x%016x", RDst) << "\n"); + return Channel.readBytes(Dst, Size); + } + + std::error_code handleWritePtr(TargetAddress Addr, TargetAddress PtrVal) { + DEBUG(dbgs() << " Writing pointer *" << format("0x%016x", Addr) << " = " + << format("0x%016x", PtrVal) << "\n"); + uintptr_t *Ptr = + reinterpret_cast<uintptr_t *>(static_cast<uintptr_t>(Addr)); + *Ptr = static_cast<uintptr_t>(PtrVal); + return std::error_code(); + } + + ChannelT &Channel; + SymbolLookupFtor SymbolLookup; + std::map<ResourceIdMgr::ResourceId, Allocator> Allocators; + typedef std::vector<typename TargetT::IndirectStubsInfo> ISBlockOwnerList; + std::map<ResourceIdMgr::ResourceId, ISBlockOwnerList> IndirectStubsOwners; + sys::OwningMemoryBlock ResolverBlock; + std::vector<sys::OwningMemoryBlock> TrampolineBlocks; +}; + +} // end namespace remote +} // end namespace orc +} // end namespace llvm + +#undef DEBUG_TYPE + +#endif diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCChannel.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCChannel.h new file mode 100644 index 0000000..b97b6da --- /dev/null +++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCChannel.h @@ -0,0 +1,179 @@ +// -*- c++ -*- + +#ifndef LLVM_EXECUTIONENGINE_ORC_RPCCHANNEL_H +#define LLVM_EXECUTIONENGINE_ORC_RPCCHANNEL_H + +#include "OrcError.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Endian.h" + +#include <system_error> + +namespace llvm { +namespace orc { +namespace remote { + +/// Interface for byte-streams to be used with RPC. +class RPCChannel { +public: + virtual ~RPCChannel() {} + + /// Read Size bytes from the stream into *Dst. + virtual std::error_code readBytes(char *Dst, unsigned Size) = 0; + + /// Read size bytes from *Src and append them to the stream. + virtual std::error_code appendBytes(const char *Src, unsigned Size) = 0; + + /// Flush the stream if possible. + virtual std::error_code send() = 0; +}; + +/// RPC channel serialization for a variadic list of arguments. +template <typename T, typename... Ts> +std::error_code serialize_seq(RPCChannel &C, const T &Arg, const Ts &... Args) { + if (auto EC = serialize(C, Arg)) + return EC; + return serialize_seq(C, Args...); +} + +/// RPC channel serialization for an (empty) variadic list of arguments. +inline std::error_code serialize_seq(RPCChannel &C) { + return std::error_code(); +} + +/// RPC channel deserialization for a variadic list of arguments. +template <typename T, typename... Ts> +std::error_code deserialize_seq(RPCChannel &C, T &Arg, Ts &... Args) { + if (auto EC = deserialize(C, Arg)) + return EC; + return deserialize_seq(C, Args...); +} + +/// RPC channel serialization for an (empty) variadic list of arguments. +inline std::error_code deserialize_seq(RPCChannel &C) { + return std::error_code(); +} + +/// RPC channel serialization for integer primitives. +template <typename T> +typename std::enable_if< + std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value || + std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value || + std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value || + std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, + std::error_code>::type +serialize(RPCChannel &C, T V) { + support::endian::byte_swap<T, support::big>(V); + return C.appendBytes(reinterpret_cast<const char *>(&V), sizeof(T)); +} + +/// RPC channel deserialization for integer primitives. +template <typename T> +typename std::enable_if< + std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value || + std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value || + std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value || + std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, + std::error_code>::type +deserialize(RPCChannel &C, T &V) { + if (auto EC = C.readBytes(reinterpret_cast<char *>(&V), sizeof(T))) + return EC; + support::endian::byte_swap<T, support::big>(V); + return std::error_code(); +} + +/// RPC channel serialization for enums. +template <typename T> +typename std::enable_if<std::is_enum<T>::value, std::error_code>::type +serialize(RPCChannel &C, T V) { + return serialize(C, static_cast<typename std::underlying_type<T>::type>(V)); +} + +/// RPC channel deserialization for enums. +template <typename T> +typename std::enable_if<std::is_enum<T>::value, std::error_code>::type +deserialize(RPCChannel &C, T &V) { + typename std::underlying_type<T>::type Tmp; + std::error_code EC = deserialize(C, Tmp); + V = static_cast<T>(Tmp); + return EC; +} + +/// RPC channel serialization for bools. +inline std::error_code serialize(RPCChannel &C, bool V) { + uint8_t VN = V ? 1 : 0; + return C.appendBytes(reinterpret_cast<const char *>(&VN), 1); +} + +/// RPC channel deserialization for bools. +inline std::error_code deserialize(RPCChannel &C, bool &V) { + uint8_t VN = 0; + if (auto EC = C.readBytes(reinterpret_cast<char *>(&VN), 1)) + return EC; + + V = (VN != 0) ? true : false; + return std::error_code(); +} + +/// RPC channel serialization for StringRefs. +/// Note: There is no corresponding deseralization for this, as StringRef +/// doesn't own its memory and so can't hold the deserialized data. +inline std::error_code serialize(RPCChannel &C, StringRef S) { + if (auto EC = serialize(C, static_cast<uint64_t>(S.size()))) + return EC; + return C.appendBytes((const char *)S.bytes_begin(), S.size()); +} + +/// RPC channel serialization for std::strings. +inline std::error_code serialize(RPCChannel &C, const std::string &S) { + return serialize(C, StringRef(S)); +} + +/// RPC channel deserialization for std::strings. +inline std::error_code deserialize(RPCChannel &C, std::string &S) { + uint64_t Count; + if (auto EC = deserialize(C, Count)) + return EC; + S.resize(Count); + return C.readBytes(&S[0], Count); +} + +/// RPC channel serialization for ArrayRef<T>. +template <typename T> +std::error_code serialize(RPCChannel &C, const ArrayRef<T> &A) { + if (auto EC = serialize(C, static_cast<uint64_t>(A.size()))) + return EC; + + for (const auto &E : A) + if (auto EC = serialize(C, E)) + return EC; + + return std::error_code(); +} + +/// RPC channel serialization for std::array<T>. +template <typename T> +std::error_code serialize(RPCChannel &C, const std::vector<T> &V) { + return serialize(C, ArrayRef<T>(V)); +} + +/// RPC channel deserialization for std::array<T>. +template <typename T> +std::error_code deserialize(RPCChannel &C, std::vector<T> &V) { + uint64_t Count = 0; + if (auto EC = deserialize(C, Count)) + return EC; + + V.resize(Count); + for (auto &E : V) + if (auto EC = deserialize(C, E)) + return EC; + + return std::error_code(); +} + +} // end namespace remote +} // end namespace orc +} // end namespace llvm + +#endif diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h new file mode 100644 index 0000000..0bd5cbc --- /dev/null +++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h @@ -0,0 +1,266 @@ +//===----- RPCUTils.h - Basic tilities for building RPC APIs ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Basic utilities for building RPC APIs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_RPCUTILS_H +#define LLVM_EXECUTIONENGINE_ORC_RPCUTILS_H + +#include "llvm/ADT/STLExtras.h" + +namespace llvm { +namespace orc { +namespace remote { + +// Base class containing utilities that require partial specialization. +// These cannot be included in RPC, as template class members cannot be +// partially specialized. +class RPCBase { +protected: + template <typename ProcedureIdT, ProcedureIdT ProcId, typename... Ts> + class ProcedureHelper { + public: + static const ProcedureIdT Id = ProcId; + }; + + template <typename ChannelT, typename Proc> class CallHelper; + + template <typename ChannelT, typename ProcedureIdT, ProcedureIdT ProcId, + typename... ArgTs> + class CallHelper<ChannelT, ProcedureHelper<ProcedureIdT, ProcId, ArgTs...>> { + public: + static std::error_code call(ChannelT &C, const ArgTs &... Args) { + if (auto EC = serialize(C, ProcId)) + return EC; + // If you see a compile-error on this line you're probably calling a + // function with the wrong signature. + return serialize_seq(C, Args...); + } + }; + + template <typename ChannelT, typename Proc> class HandlerHelper; + + template <typename ChannelT, typename ProcedureIdT, ProcedureIdT ProcId, + typename... ArgTs> + class HandlerHelper<ChannelT, + ProcedureHelper<ProcedureIdT, ProcId, ArgTs...>> { + public: + template <typename HandlerT> + static std::error_code handle(ChannelT &C, HandlerT Handler) { + return readAndHandle(C, Handler, llvm::index_sequence_for<ArgTs...>()); + } + + private: + template <typename HandlerT, size_t... Is> + static std::error_code readAndHandle(ChannelT &C, HandlerT Handler, + llvm::index_sequence<Is...> _) { + std::tuple<ArgTs...> RPCArgs; + if (auto EC = deserialize_seq(C, std::get<Is>(RPCArgs)...)) + return EC; + return Handler(std::get<Is>(RPCArgs)...); + } + }; + + template <typename ClassT, typename... ArgTs> class MemberFnWrapper { + public: + typedef std::error_code (ClassT::*MethodT)(ArgTs...); + MemberFnWrapper(ClassT &Instance, MethodT Method) + : Instance(Instance), Method(Method) {} + std::error_code operator()(ArgTs &... Args) { + return (Instance.*Method)(Args...); + } + + private: + ClassT &Instance; + MethodT Method; + }; + + template <typename... ArgTs> class ReadArgs { + public: + std::error_code operator()() { return std::error_code(); } + }; + + template <typename ArgT, typename... ArgTs> + class ReadArgs<ArgT, ArgTs...> : public ReadArgs<ArgTs...> { + public: + ReadArgs(ArgT &Arg, ArgTs &... Args) + : ReadArgs<ArgTs...>(Args...), Arg(Arg) {} + + std::error_code operator()(ArgT &ArgVal, ArgTs &... ArgVals) { + this->Arg = std::move(ArgVal); + return ReadArgs<ArgTs...>::operator()(ArgVals...); + } + + private: + ArgT &Arg; + }; +}; + +/// Contains primitive utilities for defining, calling and handling calls to +/// remote procedures. ChannelT is a bidirectional stream conforming to the +/// RPCChannel interface (see RPCChannel.h), and ProcedureIdT is a procedure +/// identifier type that must be serializable on ChannelT. +/// +/// These utilities support the construction of very primitive RPC utilities. +/// Their intent is to ensure correct serialization and deserialization of +/// procedure arguments, and to keep the client and server's view of the API in +/// sync. +/// +/// These utilities do not support return values. These can be handled by +/// declaring a corresponding '.*Response' procedure and expecting it after a +/// call). They also do not support versioning: the client and server *must* be +/// compiled with the same procedure definitions. +/// +/// +/// +/// Overview (see comments individual types/methods for details): +/// +/// Procedure<Id, Args...> : +/// +/// associates a unique serializable id with an argument list. +/// +/// +/// call<Proc>(Channel, Args...) : +/// +/// Calls the remote procedure 'Proc' by serializing Proc's id followed by its +/// arguments and sending the resulting bytes to 'Channel'. +/// +/// +/// handle<Proc>(Channel, <functor matching std::error_code(Args...)> : +/// +/// Handles a call to 'Proc' by deserializing its arguments and calling the +/// given functor. This assumes that the id for 'Proc' has already been +/// deserialized. +/// +/// expect<Proc>(Channel, <functor matching std::error_code(Args...)> : +/// +/// The same as 'handle', except that the procedure id should not have been +/// read yet. Expect will deserialize the id and assert that it matches Proc's +/// id. If it does not, and unexpected RPC call error is returned. + +template <typename ChannelT, typename ProcedureIdT = uint32_t> +class RPC : public RPCBase { +public: + /// Utility class for defining/referring to RPC procedures. + /// + /// Typedefs of this utility are used when calling/handling remote procedures. + /// + /// ProcId should be a unique value of ProcedureIdT (i.e. not used with any + /// other Procedure typedef in the RPC API being defined. + /// + /// the template argument Ts... gives the argument list for the remote + /// procedure. + /// + /// E.g. + /// + /// typedef Procedure<0, bool> Proc1; + /// typedef Procedure<1, std::string, std::vector<int>> Proc2; + /// + /// if (auto EC = call<Proc1>(Channel, true)) + /// /* handle EC */; + /// + /// if (auto EC = expect<Proc2>(Channel, + /// [](std::string &S, std::vector<int> &V) { + /// // Stuff. + /// return std::error_code(); + /// }) + /// /* handle EC */; + /// + template <ProcedureIdT ProcId, typename... Ts> + using Procedure = ProcedureHelper<ProcedureIdT, ProcId, Ts...>; + + /// Serialize Args... to channel C, but do not call C.send(). + /// + /// For buffered channels, this can be used to queue up several calls before + /// flushing the channel. + template <typename Proc, typename... ArgTs> + static std::error_code appendCall(ChannelT &C, const ArgTs &... Args) { + return CallHelper<ChannelT, Proc>::call(C, Args...); + } + + /// Serialize Args... to channel C and call C.send(). + template <typename Proc, typename... ArgTs> + static std::error_code call(ChannelT &C, const ArgTs &... Args) { + if (auto EC = appendCall<Proc>(C, Args...)) + return EC; + return C.send(); + } + + /// Deserialize and return an enum whose underlying type is ProcedureIdT. + static std::error_code getNextProcId(ChannelT &C, ProcedureIdT &Id) { + return deserialize(C, Id); + } + + /// Deserialize args for Proc from C and call Handler. The signature of + /// handler must conform to 'std::error_code(Args...)' where Args... matches + /// the arguments used in the Proc typedef. + template <typename Proc, typename HandlerT> + static std::error_code handle(ChannelT &C, HandlerT Handler) { + return HandlerHelper<ChannelT, Proc>::handle(C, Handler); + } + + /// Helper version of 'handle' for calling member functions. + template <typename Proc, typename ClassT, typename... ArgTs> + static std::error_code + handle(ChannelT &C, ClassT &Instance, + std::error_code (ClassT::*HandlerMethod)(ArgTs...)) { + return handle<Proc>( + C, MemberFnWrapper<ClassT, ArgTs...>(Instance, HandlerMethod)); + } + + /// Deserialize a ProcedureIdT from C and verify it matches the id for Proc. + /// If the id does match, deserialize the arguments and call the handler + /// (similarly to handle). + /// If the id does not match, return an unexpect RPC call error and do not + /// deserialize any further bytes. + template <typename Proc, typename HandlerT> + static std::error_code expect(ChannelT &C, HandlerT Handler) { + ProcedureIdT ProcId; + if (auto EC = getNextProcId(C, ProcId)) + return EC; + if (ProcId != Proc::Id) + return orcError(OrcErrorCode::UnexpectedRPCCall); + return handle<Proc>(C, Handler); + } + + /// Helper version of expect for calling member functions. + template <typename Proc, typename ClassT, typename... ArgTs> + static std::error_code + expect(ChannelT &C, ClassT &Instance, + std::error_code (ClassT::*HandlerMethod)(ArgTs...)) { + return expect<Proc>( + C, MemberFnWrapper<ClassT, ArgTs...>(Instance, HandlerMethod)); + } + + /// Helper for handling setter procedures - this method returns a functor that + /// sets the variables referred to by Args... to values deserialized from the + /// channel. + /// E.g. + /// + /// typedef Procedure<0, bool, int> Proc1; + /// + /// ... + /// bool B; + /// int I; + /// if (auto EC = expect<Proc1>(Channel, readArgs(B, I))) + /// /* Handle Args */ ; + /// + template <typename... ArgTs> + static ReadArgs<ArgTs...> readArgs(ArgTs &... Args) { + return ReadArgs<ArgTs...>(Args...); + } +}; + +} // end namespace remote +} // end namespace orc +} // end namespace llvm + +#endif diff --git a/contrib/llvm/include/llvm/ExecutionEngine/RTDyldMemoryManager.h b/contrib/llvm/include/llvm/ExecutionEngine/RTDyldMemoryManager.h index 207bad0..c500696 100644 --- a/contrib/llvm/include/llvm/ExecutionEngine/RTDyldMemoryManager.h +++ b/contrib/llvm/include/llvm/ExecutionEngine/RTDyldMemoryManager.h @@ -30,6 +30,10 @@ class ExecutionEngine; class MCJITMemoryManager : public RuntimeDyld::MemoryManager { public: + + // Don't hide the notifyObjectLoaded method from RuntimeDyld::MemoryManager. + using RuntimeDyld::MemoryManager::notifyObjectLoaded; + /// This method is called after an object has been loaded into memory but /// before relocations are applied to the loaded sections. The object load /// may have been initiated by MCJIT to resolve an external symbol for another diff --git a/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h b/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h index 385b8d0..100e97b 100644 --- a/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h +++ b/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h @@ -95,7 +95,9 @@ public: /// \brief Memory Management. class MemoryManager { + friend class RuntimeDyld; public: + MemoryManager() : FinalizationLocked(false) {} virtual ~MemoryManager() {} /// Allocate a memory block of (at least) the given size suitable for @@ -122,9 +124,11 @@ public: /// /// Note that by default the callback is disabled. To enable it /// redefine the method needsToReserveAllocationSpace to return true. - virtual void reserveAllocationSpace(uintptr_t CodeSize, - uintptr_t DataSizeRO, - uintptr_t DataSizeRW) {} + virtual void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign, + uintptr_t RODataSize, + uint32_t RODataAlign, + uintptr_t RWDataSize, + uint32_t RWDataAlign) {} /// Override to return true to enable the reserveAllocationSpace callback. virtual bool needsToReserveAllocationSpace() { return false; } @@ -151,8 +155,23 @@ public: /// Returns true if an error occurred, false otherwise. virtual bool finalizeMemory(std::string *ErrMsg = nullptr) = 0; + /// This method is called after an object has been loaded into memory but + /// before relocations are applied to the loaded sections. + /// + /// Memory managers which are preparing code for execution in an external + /// address space can use this call to remap the section addresses for the + /// newly loaded object. + /// + /// For clients that do not need access to an ExecutionEngine instance this + /// method should be preferred to its cousin + /// MCJITMemoryManager::notifyObjectLoaded as this method is compatible with + /// ORC JIT stacks. + virtual void notifyObjectLoaded(RuntimeDyld &RTDyld, + const object::ObjectFile &Obj) {} + private: virtual void anchor(); + bool FinalizationLocked; }; /// \brief Symbol resolution. @@ -241,6 +260,25 @@ public: this->ProcessAllSections = ProcessAllSections; } + /// Perform all actions needed to make the code owned by this RuntimeDyld + /// instance executable: + /// + /// 1) Apply relocations. + /// 2) Register EH frames. + /// 3) Update memory permissions*. + /// + /// * Finalization is potentially recursive**, and the 3rd step will only be + /// applied by the outermost call to finalize. This allows different + /// RuntimeDyld instances to share a memory manager without the innermost + /// finalization locking the memory and causing relocation fixup errors in + /// outer instances. + /// + /// ** Recursive finalization occurs when one RuntimeDyld instances needs the + /// address of a symbol owned by some other instance in order to apply + /// relocations. + /// + void finalizeWithMemoryManagerLocking(); + private: // RuntimeDyldImpl is the actual class. RuntimeDyld is just the public // interface. diff --git a/contrib/llvm/include/llvm/IR/Attributes.td b/contrib/llvm/include/llvm/IR/Attributes.td index 797cd55..30249bb 100644 --- a/contrib/llvm/include/llvm/IR/Attributes.td +++ b/contrib/llvm/include/llvm/IR/Attributes.td @@ -189,4 +189,9 @@ class MergeRule<string F> { string MergeFunc = F; } +def : MergeRule<"setAND<LessPreciseFPMADAttr>">; +def : MergeRule<"setAND<NoInfsFPMathAttr>">; +def : MergeRule<"setAND<NoNansFPMathAttr>">; +def : MergeRule<"setAND<UnsafeFPMathAttr>">; +def : MergeRule<"setOR<NoImplicitFloatAttr>">; def : MergeRule<"adjustCallerSSPLevel">; diff --git a/contrib/llvm/include/llvm/IR/Function.h b/contrib/llvm/include/llvm/IR/Function.h index 2a98393..4f64cae 100644 --- a/contrib/llvm/include/llvm/IR/Function.h +++ b/contrib/llvm/include/llvm/IR/Function.h @@ -66,7 +66,8 @@ private: * bit 2 : HasPrologueData * bit 3 : HasPersonalityFn * bits 4-13 : CallingConvention - * bits 14-15 : [reserved] + * bits 14 : HasGC + * bits 15 : [reserved] */ /// Bits from GlobalObject::GlobalObjectSubclassData. @@ -220,9 +221,11 @@ public: /// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm /// to use during code generation. - bool hasGC() const; - const char *getGC() const; - void setGC(const char *Str); + bool hasGC() const { + return getSubclassDataFromValue() & (1<<14); + } + const std::string &getGC() const; + void setGC(const std::string Str); void clearGC(); /// @brief adds the attribute to the list of attributes. diff --git a/contrib/llvm/include/llvm/IR/IRBuilder.h b/contrib/llvm/include/llvm/IR/IRBuilder.h index a305054..1b75c60 100644 --- a/contrib/llvm/include/llvm/IR/IRBuilder.h +++ b/contrib/llvm/include/llvm/IR/IRBuilder.h @@ -178,10 +178,10 @@ public: void clearFastMathFlags() { FMF.clear(); } /// \brief Set the floating point math metadata to be used. - void SetDefaultFPMathTag(MDNode *FPMathTag) { DefaultFPMathTag = FPMathTag; } + void setDefaultFPMathTag(MDNode *FPMathTag) { DefaultFPMathTag = FPMathTag; } /// \brief Set the fast-math flags to be used with generated fp-math operators - void SetFastMathFlags(FastMathFlags NewFMF) { FMF = NewFMF; } + void setFastMathFlags(FastMathFlags NewFMF) { FMF = NewFMF; } //===--------------------------------------------------------------------===// // RAII helpers. diff --git a/contrib/llvm/include/llvm/IR/Intrinsics.td b/contrib/llvm/include/llvm/IR/Intrinsics.td index 5a95ddc..f67029a 100644 --- a/contrib/llvm/include/llvm/IR/Intrinsics.td +++ b/contrib/llvm/include/llvm/IR/Intrinsics.td @@ -575,7 +575,7 @@ def int_experimental_gc_statepoint : Intrinsic<[llvm_token_ty], def int_experimental_gc_result : Intrinsic<[llvm_any_ty], [llvm_token_ty], [IntrReadMem]>; -def int_experimental_gc_relocate : Intrinsic<[llvm_anyptr_ty], +def int_experimental_gc_relocate : Intrinsic<[llvm_any_ty], [llvm_token_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>; diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td index 54bcbd8..8023a9f 100644 --- a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td @@ -1507,6 +1507,60 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pshuf_d_128 : + GCCBuiltin<"__builtin_ia32_pshufd128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_i16_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pshuf_d_256 : + GCCBuiltin<"__builtin_ia32_pshufd256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_i16_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pshuf_d_512 : + GCCBuiltin<"__builtin_ia32_pshufd512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_i16_ty, llvm_v16i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pshufh_w_128 : + GCCBuiltin<"__builtin_ia32_pshufhw128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_i8_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pshufh_w_256 : + GCCBuiltin<"__builtin_ia32_pshufhw256_mask">, + Intrinsic<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_i8_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pshufh_w_512 : + GCCBuiltin<"__builtin_ia32_pshufhw512_mask">, + Intrinsic<[llvm_v32i16_ty], + [llvm_v32i16_ty, llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pshufl_w_128 : + GCCBuiltin<"__builtin_ia32_pshuflw128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_i8_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pshufl_w_256 : + GCCBuiltin<"__builtin_ia32_pshuflw256_mask">, + Intrinsic<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_i8_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pshufl_w_512 : + GCCBuiltin<"__builtin_ia32_pshuflw512_mask">, + Intrinsic<[llvm_v32i16_ty], + [llvm_v32i16_ty, llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_shuf_f32x4_256 : GCCBuiltin<"__builtin_ia32_shuf_f32x4_256_mask">, Intrinsic<[llvm_v8f32_ty], @@ -1836,25 +1890,69 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">, Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8i32_ty], [IntrReadArgMem]>; - def int_x86_avx512_mask_loadu_ps_512 : GCCBuiltin<"__builtin_ia32_loadups512_mask">, - Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty], - [IntrReadArgMem]>; - def int_x86_avx512_mask_loadu_pd_512 : GCCBuiltin<"__builtin_ia32_loadupd512_mask">, - Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], - [IntrReadArgMem]>; - def int_x86_avx512_mask_load_ps_512 : GCCBuiltin<"__builtin_ia32_loadaps512_mask">, - Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty], - [IntrReadArgMem]>; - def int_x86_avx512_mask_load_pd_512 : GCCBuiltin<"__builtin_ia32_loadapd512_mask">, - Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], - [IntrReadArgMem]>; - def int_x86_avx512_mask_move_ss : GCCBuiltin<"__builtin_ia32_movss_mask">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_move_sd : GCCBuiltin<"__builtin_ia32_movsd_mask">, - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrNoMem]>; + def int_x86_avx512_mask_loadu_ps_128 : + GCCBuiltin<"__builtin_ia32_loadups128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrReadArgMem]>; + def int_x86_avx512_mask_loadu_ps_256 : + GCCBuiltin<"__builtin_ia32_loadups256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_ptr_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrReadArgMem]>; + def int_x86_avx512_mask_loadu_ps_512 : + GCCBuiltin<"__builtin_ia32_loadups512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrReadArgMem]>; + + def int_x86_avx512_mask_loadu_pd_128 : + GCCBuiltin<"__builtin_ia32_loadupd128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_ptr_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrReadArgMem]>; + def int_x86_avx512_mask_loadu_pd_256 : + GCCBuiltin<"__builtin_ia32_loadupd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_ptr_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrReadArgMem]>; + def int_x86_avx512_mask_loadu_pd_512 : + GCCBuiltin<"__builtin_ia32_loadupd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrReadArgMem]>; + + def int_x86_avx512_mask_load_ps_128 : + GCCBuiltin<"__builtin_ia32_loadaps128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrReadArgMem]>; + def int_x86_avx512_mask_load_ps_256 : + GCCBuiltin<"__builtin_ia32_loadaps256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_ptr_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrReadArgMem]>; + def int_x86_avx512_mask_load_ps_512 : + GCCBuiltin<"__builtin_ia32_loadaps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrReadArgMem]>; + + def int_x86_avx512_mask_load_pd_128 : + GCCBuiltin<"__builtin_ia32_loadapd128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_ptr_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrReadArgMem]>; + def int_x86_avx512_mask_load_pd_256 : + GCCBuiltin<"__builtin_ia32_loadapd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_ptr_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrReadArgMem]>; + def int_x86_avx512_mask_load_pd_512 : + GCCBuiltin<"__builtin_ia32_loadapd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrReadArgMem]>; + + def int_x86_avx512_mask_move_ss : + GCCBuiltin<"__builtin_ia32_movss_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_move_sd : + GCCBuiltin<"__builtin_ia32_movsd_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; } // Conditional store ops @@ -2262,6 +2360,46 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psll_w_128 : GCCBuiltin<"__builtin_ia32_psllw128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, + llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psll_w_256 : GCCBuiltin<"__builtin_ia32_psllw256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v8i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psll_w_512 : GCCBuiltin<"__builtin_ia32_psllw512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, + llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psll_wi_128 : GCCBuiltin<"__builtin_ia32_psllwi128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, + llvm_i8_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psll_wi_256 : GCCBuiltin<"__builtin_ia32_psllwi256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_i8_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psll_wi_512 : GCCBuiltin<"__builtin_ia32_psllwi512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, + llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psllv16_hi : GCCBuiltin<"__builtin_ia32_psllv16hi_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psllv2_di : GCCBuiltin<"__builtin_ia32_psllv2di_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psllv32hi : GCCBuiltin<"__builtin_ia32_psllv32hi_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, + llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psllv4_di : GCCBuiltin<"__builtin_ia32_psllv4di_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psllv4_si : GCCBuiltin<"__builtin_ia32_psllv4si_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, + llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psllv8_hi : GCCBuiltin<"__builtin_ia32_psllv8hi_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, + llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psllv8_si : GCCBuiltin<"__builtin_ia32_psllv8si_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psra_d_128 : GCCBuiltin<"__builtin_ia32_psrad128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; @@ -2823,6 +2961,28 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrav16_hi : GCCBuiltin<"__builtin_ia32_psrav16hi_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrav32_hi : GCCBuiltin<"__builtin_ia32_psrav32hi_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, + llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrav4_si : GCCBuiltin<"__builtin_ia32_psrav4si_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, + llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrav8_hi : GCCBuiltin<"__builtin_ia32_psrav8hi_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, + llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrav8_si : GCCBuiltin<"__builtin_ia32_psrav8si_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrav_q_128 : GCCBuiltin<"__builtin_ia32_psravq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrav_q_256 : GCCBuiltin<"__builtin_ia32_psravq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrlv16_hi : GCCBuiltin<"__builtin_ia32_psrlv16hi_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; @@ -2844,6 +3004,83 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_psrlv8_si : GCCBuiltin<"__builtin_ia32_psrlv8si_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_prorv_d_128 : GCCBuiltin<"__builtin_ia32_prorvd128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, + llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_prorv_d_256 : GCCBuiltin<"__builtin_ia32_prorvd256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_prorv_d_512 : GCCBuiltin<"__builtin_ia32_prorvd512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, + llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_prorv_q_128 : GCCBuiltin<"__builtin_ia32_prorvq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_prorv_q_256 : GCCBuiltin<"__builtin_ia32_prorvq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_prorv_q_512 : GCCBuiltin<"__builtin_ia32_prorvq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_prol_d_128 : GCCBuiltin<"__builtin_ia32_prold128_mask">, + Intrinsic<[llvm_v4i32_ty] , [llvm_v4i32_ty, + llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_prol_d_256 : GCCBuiltin<"__builtin_ia32_prold256_mask">, + Intrinsic<[llvm_v8i32_ty] , [llvm_v8i32_ty, + llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_prol_d_512 : GCCBuiltin<"__builtin_ia32_prold512_mask">, + Intrinsic<[llvm_v16i32_ty] , [llvm_v16i32_ty, + llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_prol_q_128 : GCCBuiltin<"__builtin_ia32_prolq128_mask">, + Intrinsic<[llvm_v2i64_ty] , [llvm_v2i64_ty, + llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_prol_q_256 : GCCBuiltin<"__builtin_ia32_prolq256_mask">, + Intrinsic<[llvm_v4i64_ty] , [llvm_v4i64_ty, + llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_prol_q_512 : GCCBuiltin<"__builtin_ia32_prolq512_mask">, + Intrinsic<[llvm_v8i64_ty] , [llvm_v8i64_ty, + llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + + + def int_x86_avx512_mask_prolv_d_128 : GCCBuiltin<"__builtin_ia32_prolvd128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, + llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_prolv_d_256 : GCCBuiltin<"__builtin_ia32_prolvd256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_prolv_d_512 : GCCBuiltin<"__builtin_ia32_prolvd512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, + llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_prolv_q_128 : GCCBuiltin<"__builtin_ia32_prolvq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_prolv_q_256 : GCCBuiltin<"__builtin_ia32_prolvq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_prolv_q_512 : GCCBuiltin<"__builtin_ia32_prolvq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pror_d_128 : GCCBuiltin<"__builtin_ia32_prord128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, + llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pror_d_256 : GCCBuiltin<"__builtin_ia32_prord256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pror_d_512 : GCCBuiltin<"__builtin_ia32_prord512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, + llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pror_q_128 : GCCBuiltin<"__builtin_ia32_prorq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, + llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pror_q_256 : GCCBuiltin<"__builtin_ia32_prorq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pror_q_512 : GCCBuiltin<"__builtin_ia32_prorq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + } // Gather ops @@ -4208,6 +4445,61 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_kortestc_w : GCCBuiltin<"__builtin_ia32_kortestchi">, Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_pmovsxb_d_128 : GCCBuiltin<"__builtin_ia32_pmovsxbd128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovsxb_d_256 : GCCBuiltin<"__builtin_ia32_pmovsxbd256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovsxb_d_512 : GCCBuiltin<"__builtin_ia32_pmovsxbd512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i8_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovsxb_q_128 : GCCBuiltin<"__builtin_ia32_pmovsxbq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovsxb_q_256 : GCCBuiltin<"__builtin_ia32_pmovsxbq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovsxb_q_512 : GCCBuiltin<"__builtin_ia32_pmovsxbq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v16i8_ty, + llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovsxb_w_128 : GCCBuiltin<"__builtin_ia32_pmovsxbw128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovsxb_w_256 : GCCBuiltin<"__builtin_ia32_pmovsxbw256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty, + llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovsxb_w_512 : GCCBuiltin<"__builtin_ia32_pmovsxbw512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i8_ty, + llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovsxd_q_128 : GCCBuiltin<"__builtin_ia32_pmovsxdq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovsxd_q_256 : GCCBuiltin<"__builtin_ia32_pmovsxdq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovsxd_q_512 : GCCBuiltin<"__builtin_ia32_pmovsxdq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty, + llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovsxw_d_128 : GCCBuiltin<"__builtin_ia32_pmovsxwd128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovsxw_d_256 : GCCBuiltin<"__builtin_ia32_pmovsxwd256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovsxw_d_512 : GCCBuiltin<"__builtin_ia32_pmovsxwd512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i16_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovsxw_q_128 : GCCBuiltin<"__builtin_ia32_pmovsxwq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovsxw_q_256 : GCCBuiltin<"__builtin_ia32_pmovsxwq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovsxw_q_512 : GCCBuiltin<"__builtin_ia32_pmovsxwq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i16_ty, + llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; } // Conversion ops @@ -5319,6 +5611,62 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_pmovzxb_d_128 : GCCBuiltin<"__builtin_ia32_pmovzxbd128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovzxb_d_256 : GCCBuiltin<"__builtin_ia32_pmovzxbd256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovzxb_d_512 : GCCBuiltin<"__builtin_ia32_pmovzxbd512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i8_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovzxb_q_128 : GCCBuiltin<"__builtin_ia32_pmovzxbq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovzxb_q_256 : GCCBuiltin<"__builtin_ia32_pmovzxbq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovzxb_q_512 : GCCBuiltin<"__builtin_ia32_pmovzxbq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v16i8_ty, + llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovzxb_w_128 : GCCBuiltin<"__builtin_ia32_pmovzxbw128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovzxb_w_256 : GCCBuiltin<"__builtin_ia32_pmovzxbw256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty, + llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovzxb_w_512 : GCCBuiltin<"__builtin_ia32_pmovzxbw512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i8_ty, + llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovzxd_q_128 : GCCBuiltin<"__builtin_ia32_pmovzxdq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovzxd_q_256 : GCCBuiltin<"__builtin_ia32_pmovzxdq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovzxd_q_512 : GCCBuiltin<"__builtin_ia32_pmovzxdq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty, + llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovzxw_d_128 : GCCBuiltin<"__builtin_ia32_pmovzxwd128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovzxw_d_256 : GCCBuiltin<"__builtin_ia32_pmovzxwd256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovzxw_d_512 : GCCBuiltin<"__builtin_ia32_pmovzxwd512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i16_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovzxw_q_128 : GCCBuiltin<"__builtin_ia32_pmovzxwq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovzxw_q_256 : GCCBuiltin<"__builtin_ia32_pmovzxwq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmovzxw_q_512 : GCCBuiltin<"__builtin_ia32_pmovzxwq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i16_ty, + llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + } //Bitwise Ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". diff --git a/contrib/llvm/include/llvm/IR/LLVMContext.h b/contrib/llvm/include/llvm/IR/LLVMContext.h index c546fc3..56aa301 100644 --- a/contrib/llvm/include/llvm/IR/LLVMContext.h +++ b/contrib/llvm/include/llvm/IR/LLVMContext.h @@ -93,6 +93,17 @@ public: /// tag registered with an LLVMContext has an unique ID. uint32_t getOperandBundleTagID(StringRef Tag) const; + + /// Define the GC for a function + void setGC(const Function &Fn, std::string GCName); + + /// Return the GC for a function + const std::string &getGC(const Function &Fn); + + /// Remove the GC for a function + void deleteGC(const Function &Fn); + + typedef void (*InlineAsmDiagHandlerTy)(const SMDiagnostic&, void *Context, unsigned LocCookie); diff --git a/contrib/llvm/include/llvm/IR/Metadata.h b/contrib/llvm/include/llvm/IR/Metadata.h index 4a8557d..df8ce35 100644 --- a/contrib/llvm/include/llvm/IR/Metadata.h +++ b/contrib/llvm/include/llvm/IR/Metadata.h @@ -915,11 +915,21 @@ public: /// \brief Resolve cycles. /// /// Once all forward declarations have been resolved, force cycles to be - /// resolved. If \p AllowTemps is true, then any temporary metadata - /// is ignored, otherwise it asserts when encountering temporary metadata. + /// resolved. This interface is used when there are no more temporaries, + /// and thus unresolved nodes are part of cycles and no longer need RAUW + /// support. /// /// \pre No operands (or operands' operands, etc.) have \a isTemporary(). - void resolveCycles(bool AllowTemps = false); + void resolveCycles() { resolveRecursivelyImpl(/* AllowTemps */ false); } + + /// \brief Resolve cycles while ignoring temporaries. + /// + /// This drops RAUW support for any temporaries, which can no longer + /// be uniqued. + /// + void resolveNonTemporaries() { + resolveRecursivelyImpl(/* AllowTemps */ true); + } /// \brief Replace a temporary node with a permanent one. /// @@ -977,6 +987,11 @@ private: void decrementUnresolvedOperandCount(); unsigned countUnresolvedOperands(); + /// Resolve cycles recursively. If \p AllowTemps is true, then any temporary + /// metadata is ignored, otherwise it asserts when encountering temporary + /// metadata. + void resolveRecursivelyImpl(bool AllowTemps); + /// \brief Mutate this to be "uniqued". /// /// Mutate this so that \a isUniqued(). diff --git a/contrib/llvm/include/llvm/InitializePasses.h b/contrib/llvm/include/llvm/InitializePasses.h index cb2b139..90fbc1d 100644 --- a/contrib/llvm/include/llvm/InitializePasses.h +++ b/contrib/llvm/include/llvm/InitializePasses.h @@ -132,7 +132,6 @@ void initializeEarlyCSELegacyPassPass(PassRegistry &); void initializeEliminateAvailableExternallyPass(PassRegistry&); void initializeExpandISelPseudosPass(PassRegistry&); void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&); -void initializeFunctionAttrsPass(PassRegistry&); void initializeGCMachineCodeAnalysisPass(PassRegistry&); void initializeGCModuleInfoPass(PassRegistry&); void initializeGVNPass(PassRegistry&); @@ -227,6 +226,7 @@ void initializePostDomOnlyViewerPass(PassRegistry&); void initializePostDomPrinterPass(PassRegistry&); void initializePostDomViewerPass(PassRegistry&); void initializePostDominatorTreePass(PassRegistry&); +void initializePostOrderFunctionAttrsPass(PassRegistry&); void initializePostRASchedulerPass(PassRegistry&); void initializePostMachineSchedulerPass(PassRegistry&); void initializePrintFunctionPassWrapperPass(PassRegistry&); @@ -242,6 +242,7 @@ void initializeRegionOnlyPrinterPass(PassRegistry&); void initializeRegionOnlyViewerPass(PassRegistry&); void initializeRegionPrinterPass(PassRegistry&); void initializeRegionViewerPass(PassRegistry&); +void initializeReversePostOrderFunctionAttrsPass(PassRegistry&); void initializeRewriteStatepointsForGCPass(PassRegistry&); void initializeSafeStackPass(PassRegistry&); void initializeSCCPPass(PassRegistry&); diff --git a/contrib/llvm/include/llvm/LinkAllPasses.h b/contrib/llvm/include/llvm/LinkAllPasses.h index e4192bd..6a40ca3 100644 --- a/contrib/llvm/include/llvm/LinkAllPasses.h +++ b/contrib/llvm/include/llvm/LinkAllPasses.h @@ -157,7 +157,8 @@ namespace { (void) llvm::createPostDomTree(); (void) llvm::createInstructionNamerPass(); (void) llvm::createMetaRenamerPass(); - (void) llvm::createFunctionAttrsPass(); + (void) llvm::createPostOrderFunctionAttrsPass(); + (void) llvm::createReversePostOrderFunctionAttrsPass(); (void) llvm::createMergeFunctionsPass(); (void) llvm::createPrintModulePass(*(llvm::raw_ostream*)nullptr); (void) llvm::createPrintFunctionPass(*(llvm::raw_ostream*)nullptr); diff --git a/contrib/llvm/include/llvm/Linker/Linker.h b/contrib/llvm/include/llvm/Linker/Linker.h index dde3f73..2b051e6 100644 --- a/contrib/llvm/include/llvm/Linker/Linker.h +++ b/contrib/llvm/include/llvm/Linker/Linker.h @@ -67,10 +67,9 @@ public: DenseMap<unsigned, MDNode *> *ValIDToTempMDMap); }; -/// Create a new module with exported local functions renamed and promoted -/// for ThinLTO. -std::unique_ptr<Module> renameModuleForThinLTO(std::unique_ptr<Module> M, - const FunctionInfoIndex *Index); +/// Perform in-place global value handling on the given Module for +/// exported local functions renamed and promoted for ThinLTO. +bool renameModuleForThinLTO(Module &M, const FunctionInfoIndex *Index); } // End llvm namespace diff --git a/contrib/llvm/include/llvm/MC/MCExpr.h b/contrib/llvm/include/llvm/MC/MCExpr.h index 1d6bdef..f6ccdc09 100644 --- a/contrib/llvm/include/llvm/MC/MCExpr.h +++ b/contrib/llvm/include/llvm/MC/MCExpr.h @@ -290,6 +290,9 @@ public: VK_Hexagon_LD_PLT, VK_Hexagon_IE, VK_Hexagon_IE_GOT, + + VK_WebAssembly_FUNCTION, // Function table index, rather than virtual addr + VK_TPREL, VK_DTPREL }; diff --git a/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h b/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h index cf2c3f1..8a3a6af 100644 --- a/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h +++ b/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h @@ -92,6 +92,7 @@ protected: MCSection *DwarfLocSection; MCSection *DwarfARangesSection; MCSection *DwarfRangesSection; + MCSection *DwarfMacinfoSection; // The pubnames section is no longer generated by default. The generation // can be enabled by a compiler flag. MCSection *DwarfPubNamesSection; @@ -245,6 +246,7 @@ public: MCSection *getDwarfLocSection() const { return DwarfLocSection; } MCSection *getDwarfARangesSection() const { return DwarfARangesSection; } MCSection *getDwarfRangesSection() const { return DwarfRangesSection; } + MCSection *getDwarfMacinfoSection() const { return DwarfMacinfoSection; } // DWARF5 Experimental Debug Info Sections MCSection *getDwarfAccelNamesSection() const { diff --git a/contrib/llvm/include/llvm/MC/MCStreamer.h b/contrib/llvm/include/llvm/MC/MCStreamer.h index 494f02d..04d143f 100644 --- a/contrib/llvm/include/llvm/MC/MCStreamer.h +++ b/contrib/llvm/include/llvm/MC/MCStreamer.h @@ -131,6 +131,10 @@ public: void finish() override; + /// Reset any state between object emissions, i.e. the equivalent of + /// MCStreamer's reset method. + virtual void reset(); + /// Callback used to implement the ldr= pseudo. /// Add a new entry to the constant pool for the current section and return an /// MCExpr that can be used to refer to the constant pool location. diff --git a/contrib/llvm/include/llvm/Object/COFF.h b/contrib/llvm/include/llvm/Object/COFF.h index 1b0e2e3..3e69c3e 100644 --- a/contrib/llvm/include/llvm/Object/COFF.h +++ b/contrib/llvm/include/llvm/Object/COFF.h @@ -858,6 +858,9 @@ public: std::error_code getExportRVA(uint32_t &Result) const; std::error_code getSymbolName(StringRef &Result) const; + std::error_code isForwarder(bool &Result) const; + std::error_code getForwardTo(StringRef &Result) const; + private: const export_directory_table_entry *ExportTable; uint32_t Index; diff --git a/contrib/llvm/include/llvm/Object/ELFObjectFile.h b/contrib/llvm/include/llvm/Object/ELFObjectFile.h index 5823848..5d826da 100644 --- a/contrib/llvm/include/llvm/Object/ELFObjectFile.h +++ b/contrib/llvm/include/llvm/Object/ELFObjectFile.h @@ -842,6 +842,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const { case ELF::EM_SPARC: case ELF::EM_SPARC32PLUS: return "ELF32-sparc"; + case ELF::EM_WEBASSEMBLY: + return "ELF32-wasm"; default: return "ELF32-unknown"; } @@ -861,6 +863,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const { return "ELF64-sparc"; case ELF::EM_MIPS: return "ELF64-mips"; + case ELF::EM_WEBASSEMBLY: + return "ELF64-wasm"; default: return "ELF64-unknown"; } @@ -908,6 +912,12 @@ unsigned ELFObjectFile<ELFT>::getArch() const { return IsLittleEndian ? Triple::sparcel : Triple::sparc; case ELF::EM_SPARCV9: return Triple::sparcv9; + case ELF::EM_WEBASSEMBLY: + switch (EF.getHeader()->e_ident[ELF::EI_CLASS]) { + case ELF::ELFCLASS32: return Triple::wasm32; + case ELF::ELFCLASS64: return Triple::wasm64; + default: return Triple::UnknownArch; + } default: return Triple::UnknownArch; diff --git a/contrib/llvm/include/llvm/Pass.h b/contrib/llvm/include/llvm/Pass.h index 3c4d838..99604cd 100644 --- a/contrib/llvm/include/llvm/Pass.h +++ b/contrib/llvm/include/llvm/Pass.h @@ -369,6 +369,10 @@ protected: /// @brief This is the storage for the -time-passes option. extern bool TimePassesIsEnabled; +/// isFunctionInPrintList - returns true if a function should be printed via +// debugging options like -print-after-all/-print-before-all. +// @brief Tells if the function IR should be printed by PrinterPass. +extern bool isFunctionInPrintList(StringRef FunctionName); } // End llvm namespace // Include support files that contain important APIs commonly used by Passes, diff --git a/contrib/llvm/include/llvm/ProfileData/CoverageMapping.h b/contrib/llvm/include/llvm/ProfileData/CoverageMapping.h index 3790e13..92a991e 100644 --- a/contrib/llvm/include/llvm/ProfileData/CoverageMapping.h +++ b/contrib/llvm/include/llvm/ProfileData/CoverageMapping.h @@ -20,13 +20,34 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator.h" +#include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/raw_ostream.h" #include <system_error> #include <tuple> namespace llvm { +namespace coverage { +enum class coveragemap_error { + success = 0, + eof, + no_data_found, + unsupported_version, + truncated, + malformed +}; +} // end of coverage namespace. +} + +namespace std { +template <> +struct is_error_code_enum<llvm::coverage::coveragemap_error> : std::true_type { +}; +} + +namespace llvm { class IndexedInstrProfReader; namespace coverage { @@ -35,8 +56,6 @@ class CoverageMappingReader; class CoverageMapping; struct CounterExpressions; -enum CoverageMappingVersion { CoverageMappingVersion1 }; - /// \brief A Counter is an abstract value that describes how to compute the /// execution count for a region of code using the collected profile count data. struct Counter { @@ -454,6 +473,76 @@ public: CoverageData getCoverageForExpansion(const ExpansionRecord &Expansion); }; +const std::error_category &coveragemap_category(); + +inline std::error_code make_error_code(coveragemap_error E) { + return std::error_code(static_cast<int>(E), coveragemap_category()); +} + +// Profile coverage map has the following layout: +// [CoverageMapFileHeader] +// [ArrayStart] +// [CovMapFunctionRecord] +// [CovMapFunctionRecord] +// ... +// [ArrayEnd] +// [Encoded Region Mapping Data] +LLVM_PACKED_START +template <class IntPtrT> struct CovMapFunctionRecord { +#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name; +#include "llvm/ProfileData/InstrProfData.inc" + + // Return the structural hash associated with the function. + template <support::endianness Endian> uint64_t getFuncHash() const { + return support::endian::byte_swap<uint64_t, Endian>(FuncHash); + } + // Return the coverage map data size for the funciton. + template <support::endianness Endian> uint32_t getDataSize() const { + return support::endian::byte_swap<uint32_t, Endian>(DataSize); + } + // Return function lookup key. The value is consider opaque. + template <support::endianness Endian> IntPtrT getFuncNameRef() const { + return support::endian::byte_swap<IntPtrT, Endian>(NamePtr); + } + // Return the PGO name of the function */ + template <support::endianness Endian> + std::error_code getFuncName(InstrProfSymtab &ProfileNames, + StringRef &FuncName) const { + IntPtrT NameRef = getFuncNameRef<Endian>(); + uint32_t NameS = support::endian::byte_swap<uint32_t, Endian>(NameSize); + FuncName = ProfileNames.getFuncName(NameRef, NameS); + if (NameS && FuncName.empty()) + return coveragemap_error::malformed; + return std::error_code(); + } +}; +// Per module coverage mapping data header, i.e. CoverageMapFileHeader +// documented above. +struct CovMapHeader { +#define COVMAP_HEADER(Type, LLVMType, Name, Init) Type Name; +#include "llvm/ProfileData/InstrProfData.inc" + template <support::endianness Endian> uint32_t getNRecords() const { + return support::endian::byte_swap<uint32_t, Endian>(NRecords); + } + template <support::endianness Endian> uint32_t getFilenamesSize() const { + return support::endian::byte_swap<uint32_t, Endian>(FilenamesSize); + } + template <support::endianness Endian> uint32_t getCoverageSize() const { + return support::endian::byte_swap<uint32_t, Endian>(CoverageSize); + } + template <support::endianness Endian> uint32_t getVersion() const { + return support::endian::byte_swap<uint32_t, Endian>(Version); + } +}; + +LLVM_PACKED_END + +enum CoverageMappingVersion { + CoverageMappingVersion1 = 0, + // The current versin is Version1 + CoverageMappingCurrentVersion = INSTR_PROF_COVMAP_VERSION +}; + } // end namespace coverage /// \brief Provide DenseMapInfo for CounterExpression @@ -484,26 +573,6 @@ template<> struct DenseMapInfo<coverage::CounterExpression> { } }; -const std::error_category &coveragemap_category(); - -enum class coveragemap_error { - success = 0, - eof, - no_data_found, - unsupported_version, - truncated, - malformed -}; - -inline std::error_code make_error_code(coveragemap_error E) { - return std::error_code(static_cast<int>(E), coveragemap_category()); -} - } // end namespace llvm -namespace std { -template <> -struct is_error_code_enum<llvm::coveragemap_error> : std::true_type {}; -} - #endif // LLVM_PROFILEDATA_COVERAGEMAPPING_H_ diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProf.h b/contrib/llvm/include/llvm/ProfileData/InstrProf.h index 49569d8..c84d8d2 100644 --- a/contrib/llvm/include/llvm/ProfileData/InstrProf.h +++ b/contrib/llvm/include/llvm/ProfileData/InstrProf.h @@ -30,7 +30,6 @@ #include <system_error> #include <vector> -#define INSTR_PROF_INDEX_VERSION 3 namespace llvm { class Function; @@ -66,7 +65,8 @@ inline StringRef getInstrProfValueProfFuncName() { /// Return the name of the section containing function coverage mapping /// data. inline StringRef getInstrProfCoverageSectionName(bool AddSegment) { - return AddSegment ? "__DATA,__llvm_covmap" : "__llvm_covmap"; + return AddSegment ? "__DATA," INSTR_PROF_COVMAP_SECT_NAME_STR + : INSTR_PROF_COVMAP_SECT_NAME_STR; } /// Return the name prefix of variables containing instrumented function names. @@ -89,6 +89,12 @@ inline StringRef getCoverageMappingVarName() { return "__llvm_coverage_mapping"; } +/// Return the name of the internal variable recording the array +/// of PGO name vars referenced by the coverage mapping, The owning +/// functions of those names are not emitted by FE (e.g, unused inline +/// functions.) +inline StringRef getCoverageNamesVarName() { return "__llvm_coverage_names"; } + /// Return the name of function that registers all the per-function control /// data at program startup time by calling __llvm_register_function. This /// function has internal linkage and is called by __llvm_profile_init @@ -349,11 +355,14 @@ struct InstrProfValueSiteRecord { return left.Value < right.Value; }); } + /// Sort ValueData Descending by Count + inline void sortByCount(); /// Merge data from another InstrProfValueSiteRecord /// Optionally scale merged counts by \p Weight. - instrprof_error mergeValueData(InstrProfValueSiteRecord &Input, - uint64_t Weight = 1); + instrprof_error merge(InstrProfValueSiteRecord &Input, uint64_t Weight = 1); + /// Scale up value profile data counts. + instrprof_error scale(uint64_t Weight); }; /// Profiling information for a single function. @@ -396,6 +405,19 @@ struct InstrProfRecord { /// Optionally scale merged counts by \p Weight. instrprof_error merge(InstrProfRecord &Other, uint64_t Weight = 1); + /// Scale up profile counts (including value profile data) by + /// \p Weight. + instrprof_error scale(uint64_t Weight); + + /// Sort value profile data (per site) by count. + void sortValueData() { + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { + std::vector<InstrProfValueSiteRecord> &SiteRecords = + getValueSitesForKind(Kind); + for (auto &SR : SiteRecords) + SR.sortByCount(); + } + } /// Clear value data entries void clearValueData() { for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) @@ -430,6 +452,8 @@ private: // Scale merged value counts by \p Weight. instrprof_error mergeValueProfData(uint32_t ValueKind, InstrProfRecord &Src, uint64_t Weight); + // Scale up value profile data count. + instrprof_error scaleValueProfData(uint32_t ValueKind, uint64_t Weight); }; uint32_t InstrProfRecord::getNumValueKinds() const { @@ -497,11 +521,22 @@ inline support::endianness getHostEndianness() { #define INSTR_PROF_VALUE_PROF_DATA #include "llvm/ProfileData/InstrProfData.inc" - /* - * Initialize the record for runtime value profile data. - * Return 0 if the initialization is successful, otherwise - * return 1. - */ +void InstrProfValueSiteRecord::sortByCount() { + ValueData.sort( + [](const InstrProfValueData &left, const InstrProfValueData &right) { + return left.Count > right.Count; + }); + // Now truncate + size_t max_s = INSTR_PROF_MAX_NUM_VAL_PER_SITE; + if (ValueData.size() > max_s) + ValueData.resize(max_s); +} + +/* +* Initialize the record for runtime value profile data. +* Return 0 if the initialization is successful, otherwise +* return 1. +*/ int initializeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord, const uint16_t *NumValueSites, ValueProfNode **Nodes); @@ -597,31 +632,6 @@ struct Header { } // end namespace RawInstrProf -namespace coverage { - -// Profile coverage map has the following layout: -// [CoverageMapFileHeader] -// [ArrayStart] -// [CovMapFunctionRecord] -// [CovMapFunctionRecord] -// ... -// [ArrayEnd] -// [Encoded Region Mapping Data] -LLVM_PACKED_START -template <class IntPtrT> struct CovMapFunctionRecord { - #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name; - #include "llvm/ProfileData/InstrProfData.inc" -}; -// Per module coverage mapping data header, i.e. CoverageMapFileHeader -// documented above. -struct CovMapHeader { -#define COVMAP_HEADER(Type, LLVMType, Name, Init) Type Name; -#include "llvm/ProfileData/InstrProfData.inc" -}; - -LLVM_PACKED_END -} - } // end namespace llvm namespace std { diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc b/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc index 3a7c0c5..33c7d94 100644 --- a/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -28,7 +28,7 @@ * * Examples of how the template is used to instantiate structure definition: * 1. To declare a structure: - * + * * struct ProfData { * #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \ * Type Name; @@ -155,7 +155,7 @@ VALUE_PROF_KIND(IPVK_Last, IPVK_IndirectCallTarget) #endif COVMAP_FUNC_RECORD(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), \ NamePtr, llvm::ConstantExpr::getBitCast(NamePtr, \ - llvm::Type::getInt8PtrTy(Ctx))) + llvm::Type::getInt8PtrTy(Ctx))) COVMAP_FUNC_RECORD(const uint32_t, llvm::Type::getInt32Ty(Ctx), NameSize, \ llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx),\ NameValue.size())) @@ -182,7 +182,7 @@ COVMAP_HEADER(uint32_t, Int32Ty, FilenamesSize, \ COVMAP_HEADER(uint32_t, Int32Ty, CoverageSize, \ llvm::ConstantInt::get(Int32Ty, CoverageMappingSize)) COVMAP_HEADER(uint32_t, Int32Ty, Version, \ - llvm::ConstantInt::get(Int32Ty, CoverageMappingVersion1)) + llvm::ConstantInt::get(Int32Ty, CoverageMappingCurrentVersion)) #undef COVMAP_HEADER /* COVMAP_HEADER end. */ @@ -190,7 +190,8 @@ COVMAP_HEADER(uint32_t, Int32Ty, Version, \ #ifdef INSTR_PROF_VALUE_PROF_DATA #define INSTR_PROF_DATA_DEFINED -/*! +#define INSTR_PROF_MAX_NUM_VAL_PER_SITE 255 +/*! * This is the header of the data structure that defines the on-disk * layout of the value profile data of a particular kind for one function. */ @@ -202,7 +203,7 @@ typedef struct ValueProfRecord { * otherwise the record for this kind won't be emitted. */ uint32_t NumValueSites; - /* + /* * The first element of the array that stores the number of profiled * values for each value site. The size of the array is NumValueSites. * Since NumValueSites is greater than zero, there is at least one @@ -226,7 +227,7 @@ typedef struct ValueProfRecord { * \brief Return the number of value sites. */ uint32_t getNumValueSites() const { return NumValueSites; } - /*! + /*! * \brief Read data from this record and save it to Record. */ void deserializeTo(InstrProfRecord &Record, @@ -247,10 +248,10 @@ typedef struct ValueProfRecord { typedef struct ValueProfData { /* * Total size in bytes including this field. It must be a multiple - * of sizeof(uint64_t). + * of sizeof(uint64_t). */ uint32_t TotalSize; - /* + /* *The number of value profile kinds that has value profile data. * In this implementation, a value profile kind is considered to * have profile data if the number of value profile sites for the @@ -260,7 +261,7 @@ typedef struct ValueProfData { */ uint32_t NumValueKinds; - /* + /* * Following are a sequence of variable length records. The prefix/header * of each record is defined by ValueProfRecord type. The number of * records is NumValueKinds. @@ -314,7 +315,7 @@ typedef struct ValueProfData { #endif } ValueProfData; -/* +/* * The closure is designed to abstact away two types of value profile data: * - InstrProfRecord which is the primary data structure used to * represent profile data in host tools (reader, writer, and profile-use) @@ -335,7 +336,7 @@ typedef struct ValueProfRecordClosure { uint32_t (*GetNumValueData)(const void *Record, uint32_t VKind); uint32_t (*GetNumValueDataForSite)(const void *R, uint32_t VK, uint32_t S); - /* + /* * After extracting the value profile data from the value profile record, * this method is used to map the in-memory value to on-disk value. If * the method is null, value will be written out untranslated. @@ -346,7 +347,7 @@ typedef struct ValueProfRecordClosure { ValueProfData *(*AllocValueProfData)(size_t TotalSizeInBytes); } ValueProfRecordClosure; -/* +/* * A wrapper struct that represents value profile runtime data. * Like InstrProfRecord class which is used by profiling host tools, * ValueProfRuntimeRecord also implements the abstract intefaces defined in @@ -384,7 +385,7 @@ serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record, uint32_t getNumValueKindsRT(const void *R); #undef INSTR_PROF_VALUE_PROF_DATA -#endif /* INSTR_PROF_VALUE_PROF_DATA */ +#endif /* INSTR_PROF_VALUE_PROF_DATA */ #ifdef INSTR_PROF_COMMON_API_IMPL @@ -412,7 +413,7 @@ uint32_t getValueProfRecordHeaderSize(uint32_t NumValueSites) { return Size; } -/*! +/*! * \brief Return the total size of the value profile record including the * header and the value data. */ @@ -432,7 +433,7 @@ InstrProfValueData *getValueProfRecordValueData(ValueProfRecord *This) { This->NumValueSites)); } -/*! +/*! * \brief Return the total number of value data for \c This record. */ INSTR_PROF_INLINE @@ -444,7 +445,7 @@ uint32_t getValueProfRecordNumValueData(ValueProfRecord *This) { return NumValueData; } -/*! +/*! * \brief Use this method to advance to the next \c This \c ValueProfRecord. */ INSTR_PROF_INLINE @@ -465,7 +466,7 @@ ValueProfRecord *getFirstValueProfRecord(ValueProfData *This) { /* Closure based interfaces. */ -/*! +/*! * Return the total size in bytes of the on-disk value profile data * given the data stored in Record. */ @@ -535,7 +536,7 @@ ValueProfData *serializeValueProfDataFrom(ValueProfRecordClosure *Closure, return VPD; } -/* +/* * The value profiler runtime library stores the value profile data * for a given function in \c NumValueSites and \c Nodes structures. * \c ValueProfRuntimeRecord class is used to encapsulate the runtime @@ -639,7 +640,7 @@ static ValueProfRecordClosure RTRecordClosure = {0, getValueForSiteRT, allocValueProfDataRT}; -/* +/* * Return the size of ValueProfData structure to store data * recorded in the runtime record. */ @@ -648,7 +649,7 @@ uint32_t getValueProfDataSizeRT(const ValueProfRuntimeRecord *Record) { return getValueProfDataSize(&RTRecordClosure); } -/* +/* * Return a ValueProfData instance that stores the data collected * from runtime. If \c DstData is provided by the caller, the value * profile data will be store in *DstData and DstData is returned, @@ -696,18 +697,31 @@ serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record, /* Raw profile format version. */ #define INSTR_PROF_RAW_VERSION 2 +#define INSTR_PROF_INDEX_VERSION 3 +#define INSTR_PROF_COVMAP_VERSION 0 + +/* Profile version is always of type uint_64_t. Reserve the upper 8 bits in the + * version for other variants of profile. We set the lowest bit of the upper 8 + * bits (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentaiton + * generated profile, and 0 if this is a Clang FE generated profile. +*/ +#define VARIANT_MASKS_ALL 0xff00000000000000ULL +#define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) /* Runtime section names and name strings. */ #define INSTR_PROF_DATA_SECT_NAME __llvm_prf_data #define INSTR_PROF_NAME_SECT_NAME __llvm_prf_names #define INSTR_PROF_CNTS_SECT_NAME __llvm_prf_cnts +#define INSTR_PROF_COVMAP_SECT_NAME __llvm_covmap -#define INSTR_PROF_DATA_SECT_NAME_STR \ - INSTR_PROF_QUOTE(INSTR_PROF_DATA_SECT_NAME) -#define INSTR_PROF_NAME_SECT_NAME_STR \ - INSTR_PROF_QUOTE(INSTR_PROF_NAME_SECT_NAME) -#define INSTR_PROF_CNTS_SECT_NAME_STR \ - INSTR_PROF_QUOTE(INSTR_PROF_CNTS_SECT_NAME) +#define INSTR_PROF_DATA_SECT_NAME_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_DATA_SECT_NAME) +#define INSTR_PROF_NAME_SECT_NAME_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_NAME_SECT_NAME) +#define INSTR_PROF_CNTS_SECT_NAME_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_CNTS_SECT_NAME) +#define INSTR_PROF_COVMAP_SECT_NAME_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_SECT_NAME) /* Macros to define start/stop section symbol for a given * section on Linux. For instance @@ -751,4 +765,3 @@ typedef struct ValueProfNode { #else #undef INSTR_PROF_DATA_DEFINED #endif - diff --git a/contrib/llvm/include/llvm/ProfileData/SampleProf.h b/contrib/llvm/include/llvm/ProfileData/SampleProf.h index 8df3fe8..6c39cf9 100644 --- a/contrib/llvm/include/llvm/ProfileData/SampleProf.h +++ b/contrib/llvm/include/llvm/ProfileData/SampleProf.h @@ -140,16 +140,9 @@ public: /// around unsigned integers. sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) { bool Overflowed; - if (Weight > 1) { - S = SaturatingMultiply(S, Weight, &Overflowed); - if (Overflowed) - return sampleprof_error::counter_overflow; - } - NumSamples = SaturatingAdd(NumSamples, S, &Overflowed); - if (Overflowed) - return sampleprof_error::counter_overflow; - - return sampleprof_error::success; + NumSamples = SaturatingMultiplyAdd(S, Weight, NumSamples, &Overflowed); + return Overflowed ? sampleprof_error::counter_overflow + : sampleprof_error::success; } /// Add called function \p F with samples \p S. @@ -161,16 +154,10 @@ public: uint64_t Weight = 1) { uint64_t &TargetSamples = CallTargets[F]; bool Overflowed; - if (Weight > 1) { - S = SaturatingMultiply(S, Weight, &Overflowed); - if (Overflowed) - return sampleprof_error::counter_overflow; - } - TargetSamples = SaturatingAdd(TargetSamples, S, &Overflowed); - if (Overflowed) - return sampleprof_error::counter_overflow; - - return sampleprof_error::success; + TargetSamples = + SaturatingMultiplyAdd(S, Weight, TargetSamples, &Overflowed); + return Overflowed ? sampleprof_error::counter_overflow + : sampleprof_error::success; } /// Return true if this sample record contains function calls. @@ -215,29 +202,17 @@ public: void dump() const; sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) { bool Overflowed; - if (Weight > 1) { - Num = SaturatingMultiply(Num, Weight, &Overflowed); - if (Overflowed) - return sampleprof_error::counter_overflow; - } - TotalSamples = SaturatingAdd(TotalSamples, Num, &Overflowed); - if (Overflowed) - return sampleprof_error::counter_overflow; - - return sampleprof_error::success; + TotalSamples = + SaturatingMultiplyAdd(Num, Weight, TotalSamples, &Overflowed); + return Overflowed ? sampleprof_error::counter_overflow + : sampleprof_error::success; } sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) { bool Overflowed; - if (Weight > 1) { - Num = SaturatingMultiply(Num, Weight, &Overflowed); - if (Overflowed) - return sampleprof_error::counter_overflow; - } - TotalHeadSamples = SaturatingAdd(TotalHeadSamples, Num, &Overflowed); - if (Overflowed) - return sampleprof_error::counter_overflow; - - return sampleprof_error::success; + TotalHeadSamples = + SaturatingMultiplyAdd(Num, Weight, TotalHeadSamples, &Overflowed); + return Overflowed ? sampleprof_error::counter_overflow + : sampleprof_error::success; } sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator, uint64_t Num, uint64_t Weight = 1) { diff --git a/contrib/llvm/include/llvm/Support/Allocator.h b/contrib/llvm/include/llvm/Support/Allocator.h index c608736..043d823 100644 --- a/contrib/llvm/include/llvm/Support/Allocator.h +++ b/contrib/llvm/include/llvm/Support/Allocator.h @@ -187,6 +187,7 @@ public: /// \brief Deallocate all but the current slab and reset the current pointer /// to the beginning of it, freeing all memory allocated so far. void Reset() { + // Deallocate all but the first slab, and deallocate all custom-sized slabs. DeallocateCustomSizedSlabs(); CustomSizedSlabs.clear(); @@ -198,7 +199,7 @@ public: CurPtr = (char *)Slabs.front(); End = CurPtr + SlabSize; - // Deallocate all but the first slab, and deallocate all custom-sized slabs. + __asan_poison_memory_region(*Slabs.begin(), computeSlabSize(0)); DeallocateSlabs(std::next(Slabs.begin()), Slabs.end()); Slabs.erase(std::next(Slabs.begin()), Slabs.end()); } diff --git a/contrib/llvm/include/llvm/Support/COFF.h b/contrib/llvm/include/llvm/Support/COFF.h index 0162175..0245632 100644 --- a/contrib/llvm/include/llvm/Support/COFF.h +++ b/contrib/llvm/include/llvm/Support/COFF.h @@ -656,6 +656,15 @@ namespace COFF { } }; + enum CodeViewLine : unsigned { + CVL_LineNumberStartBits = 24, + CVL_LineNumberEndDeltaBits = 7, + CVL_LineNumberEndDeltaMask = (1U << CVL_LineNumberEndDeltaBits) - 1, + CVL_MaxLineNumber = (1U << CVL_LineNumberStartBits) - 1, + CVL_IsStatement = 1U << 31, + CVL_MaxColumnNumber = UINT16_MAX, + }; + enum CodeViewIdentifiers { DEBUG_LINE_TABLES_HAVE_COLUMN_RECORDS = 0x1, DEBUG_SECTION_MAGIC = 0x4, diff --git a/contrib/llvm/include/llvm/Support/ELF.h b/contrib/llvm/include/llvm/Support/ELF.h index 97708a7..e24420f 100644 --- a/contrib/llvm/include/llvm/Support/ELF.h +++ b/contrib/llvm/include/llvm/Support/ELF.h @@ -309,7 +309,12 @@ enum { EM_COOL = 217, // iCelero CoolEngine EM_NORC = 218, // Nanoradio Optimized RISC EM_CSR_KALIMBA = 219, // CSR Kalimba architecture family - EM_AMDGPU = 224 // AMD GPU architecture + EM_AMDGPU = 224, // AMD GPU architecture + + // A request has been made to the maintainer of the official registry for + // such numbers for an official value for WebAssembly. As soon as one is + // allocated, this enum will be updated to use it. + EM_WEBASSEMBLY = 0x4157, // WebAssembly architecture }; // Object file classes. @@ -594,6 +599,11 @@ enum { #include "ELFRelocs/Sparc.def" }; +// ELF Relocation types for WebAssembly +enum { +#include "ELFRelocs/WebAssembly.def" +}; + #undef ELF_RELOC // Section header. @@ -1024,7 +1034,10 @@ enum { PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM = 0x60000000, PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT = 0x60000001, PT_AMDGPU_HSA_LOAD_READONLY_AGENT = 0x60000002, - PT_AMDGPU_HSA_LOAD_CODE_AGENT = 0x60000003 + PT_AMDGPU_HSA_LOAD_CODE_AGENT = 0x60000003, + + // WebAssembly program header types. + PT_WEBASSEMBLY_FUNCTIONS = PT_LOPROC + 0, // Function definitions. }; // Segment flag bits. diff --git a/contrib/llvm/include/llvm/Support/ELFRelocs/WebAssembly.def b/contrib/llvm/include/llvm/Support/ELFRelocs/WebAssembly.def new file mode 100644 index 0000000..9a34349 --- /dev/null +++ b/contrib/llvm/include/llvm/Support/ELFRelocs/WebAssembly.def @@ -0,0 +1,8 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +ELF_RELOC(R_WEBASSEMBLY_NONE, 0) +ELF_RELOC(R_WEBASSEMBLY_DATA, 1) +ELF_RELOC(R_WEBASSEMBLY_FUNCTION, 2) diff --git a/contrib/llvm/include/llvm/Support/GenericDomTree.h b/contrib/llvm/include/llvm/Support/GenericDomTree.h index 8751f27..8bae582 100644 --- a/contrib/llvm/include/llvm/Support/GenericDomTree.h +++ b/contrib/llvm/include/llvm/Support/GenericDomTree.h @@ -724,25 +724,17 @@ public: if (!this->IsPostDominators) { // Initialize root NodeT *entry = TraitsTy::getEntryNode(&F); - this->Roots.push_back(entry); - this->IDoms[entry] = nullptr; - this->DomTreeNodes[entry] = nullptr; + addRoot(entry); Calculate<FT, NodeT *>(*this, F); } else { // Initialize the roots list for (typename TraitsTy::nodes_iterator I = TraitsTy::nodes_begin(&F), E = TraitsTy::nodes_end(&F); - I != E; ++I) { + I != E; ++I) if (TraitsTy::child_begin(&*I) == TraitsTy::child_end(&*I)) addRoot(&*I); - // Prepopulate maps so that we don't get iterator invalidation issues - // later. - this->IDoms[&*I] = nullptr; - this->DomTreeNodes[&*I] = nullptr; - } - Calculate<FT, Inverse<NodeT *>>(*this, F); } } diff --git a/contrib/llvm/include/llvm/Support/MathExtras.h b/contrib/llvm/include/llvm/Support/MathExtras.h index 8111aee..408ae3c 100644 --- a/contrib/llvm/include/llvm/Support/MathExtras.h +++ b/contrib/llvm/include/llvm/Support/MathExtras.h @@ -717,6 +717,25 @@ SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) { return Z; } +/// \brief Multiply two unsigned integers, X and Y, and add the unsigned +/// integer, A to the product. Clamp the result to the maximum representable +/// value of T on overflow. ResultOverflowed indicates if the result is larger +/// than the maximum representable value of type T. +/// Note that this is purely a convenience function as there is no distinction +/// where overflow occurred in a 'fused' multiply-add for unsigned numbers. +template <typename T> +typename std::enable_if<std::is_unsigned<T>::value, T>::type +SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) { + bool Dummy; + bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; + + T Product = SaturatingMultiply(X, Y, &Overflowed); + if (Overflowed) + return Product; + + return SaturatingAdd(A, Product, &Overflowed); +} + extern const float huge_valf; } // End llvm namespace diff --git a/contrib/llvm/include/llvm/Transforms/IPO.h b/contrib/llvm/include/llvm/Transforms/IPO.h index 0c374a0..78d2fad 100644 --- a/contrib/llvm/include/llvm/Transforms/IPO.h +++ b/contrib/llvm/include/llvm/Transforms/IPO.h @@ -183,12 +183,20 @@ ModulePass *createBlockExtractorPass(); ModulePass *createStripDeadPrototypesPass(); //===----------------------------------------------------------------------===// -/// createFunctionAttrsPass - This pass discovers functions that do not access -/// memory, or only read memory, and gives them the readnone/readonly attribute. -/// It also discovers function arguments that are not captured by the function -/// and marks them with the nocapture attribute. +/// createPostOrderFunctionAttrsPass - This pass walks SCCs of the call graph +/// in post-order to deduce and propagate function attributes. It can discover +/// functions that do not access memory, or only read memory, and give them the +/// readnone/readonly attribute. It also discovers function arguments that are +/// not captured by the function and marks them with the nocapture attribute. /// -Pass *createFunctionAttrsPass(); +Pass *createPostOrderFunctionAttrsPass(); + +//===----------------------------------------------------------------------===// +/// createReversePostOrderFunctionAttrsPass - This pass walks SCCs of the call +/// graph in RPO to deduce and propagate function attributes. Currently it +/// only handles synthesizing norecurse attributes. +/// +Pass *createReversePostOrderFunctionAttrsPass(); //===----------------------------------------------------------------------===// /// createMergeFunctionsPass - This pass discovers identical functions and diff --git a/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h b/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h index 92a1d52..4f006f2 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -147,42 +147,12 @@ void CloneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueMapTypeRemapper *TypeMapper = nullptr, ValueMaterializer *Materializer = nullptr); -/// A helper class used with CloneAndPruneIntoFromInst to change the default -/// behavior while instructions are being cloned. -class CloningDirector { -public: - /// This enumeration describes the way CloneAndPruneIntoFromInst should - /// proceed after the CloningDirector has examined an instruction. - enum CloningAction { - ///< Continue cloning the instruction (default behavior). - CloneInstruction, - ///< Skip this instruction but continue cloning the current basic block. - SkipInstruction, - ///< Skip this instruction and stop cloning the current basic block. - StopCloningBB, - ///< Don't clone the terminator but clone the current block's successors. - CloneSuccessors - }; - - virtual ~CloningDirector() {} - - /// Subclasses must override this function to customize cloning behavior. - virtual CloningAction handleInstruction(ValueToValueMapTy &VMap, - const Instruction *Inst, - BasicBlock *NewBB) = 0; - - virtual ValueMapTypeRemapper *getTypeRemapper() { return nullptr; } - virtual ValueMaterializer *getValueMaterializer() { return nullptr; } -}; - void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, ValueToValueMapTy &VMap, bool ModuleLevelChanges, - SmallVectorImpl<ReturnInst*> &Returns, - const char *NameSuffix = "", - ClonedCodeInfo *CodeInfo = nullptr, - CloningDirector *Director = nullptr); - + SmallVectorImpl<ReturnInst *> &Returns, + const char *NameSuffix = "", + ClonedCodeInfo *CodeInfo = nullptr); /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The diff --git a/contrib/llvm/include/llvm/Transforms/Utils/Local.h b/contrib/llvm/include/llvm/Transforms/Utils/Local.h index 81b376f..911c6f1 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/Local.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/Local.h @@ -42,6 +42,7 @@ class TargetLibraryInfo; class TargetTransformInfo; class DIBuilder; class DominatorTree; +class LazyValueInfo; template<typename T> class SmallVectorImpl; @@ -303,7 +304,7 @@ void removeUnwindEdge(BasicBlock *BB); /// \brief Remove all blocks that can not be reached from the function's entry. /// /// Returns true if any basic block was removed. -bool removeUnreachableBlocks(Function &F); +bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI = nullptr); /// \brief Combine the metadata of two instructions so that K can replace J /// diff --git a/contrib/llvm/include/llvm/module.modulemap b/contrib/llvm/include/llvm/module.modulemap index 0adce0c..d74ada6 100644 --- a/contrib/llvm/include/llvm/module.modulemap +++ b/contrib/llvm/include/llvm/module.modulemap @@ -207,6 +207,7 @@ module LLVM_Utils { textual header "Support/ELFRelocs/Sparc.def" textual header "Support/ELFRelocs/SystemZ.def" textual header "Support/ELFRelocs/x86_64.def" + textual header "Support/ELFRelocs/WebAssembly.def" } // This part of the module is usable from both C and C++ code. diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 85404d8..c3d2803 100644 --- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -586,8 +586,13 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) { return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min); } -ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS, - unsigned ArgIdx) { +/// Returns true if this is a writeonly (i.e Mod only) parameter. Currently, +/// we don't have a writeonly attribute, so this only knows about builtin +/// intrinsics and target library functions. We could consider adding a +/// writeonly attribute in the future and moving all of these facts to either +/// Intrinsics.td or InferFunctionAttr.cpp +static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx, + const TargetLibraryInfo &TLI) { if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) switch (II->getIntrinsicID()) { default: @@ -597,9 +602,9 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS, case Intrinsic::memmove: // We don't currently have a writeonly attribute. All other properties // of these intrinsics are nicely described via attributes in - // Intrinsics.td and handled generically below. + // Intrinsics.td and handled generically. if (ArgIdx == 0) - return MRI_Mod; + return true; } // We can bound the aliasing properties of memset_pattern16 just as we can @@ -609,7 +614,22 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS, // handled via InferFunctionAttr. if (CS.getCalledFunction() && isMemsetPattern16(CS.getCalledFunction(), TLI)) if (ArgIdx == 0) - return MRI_Mod; + return true; + + // TODO: memset_pattern4, memset_pattern8 + // TODO: _chk variants + // TODO: strcmp, strcpy + + return false; +} + +ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS, + unsigned ArgIdx) { + + // Emulate the missing writeonly attribute by checking for known builtin + // intrinsics and target library functions. + if (isWriteOnlyParam(CS, ArgIdx, TLI)) + return MRI_Mod; if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly)) return MRI_Ref; diff --git a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp index 07b389a..6dd1d0a 100644 --- a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp +++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp @@ -612,9 +612,10 @@ namespace { bool runOnSCC(CallGraphSCC &SCC) override { Out << Banner; for (CallGraphNode *CGN : SCC) { - if (CGN->getFunction()) - CGN->getFunction()->print(Out); - else + if (CGN->getFunction()) { + if (isFunctionInPrintList(CGN->getFunction()->getName())) + CGN->getFunction()->print(Out); + } else Out << "\nPrinting <null> Function\n"; } return false; diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp index 249f395..1babb82 100644 --- a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp @@ -358,21 +358,6 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V, if (CS.isArgOperand(&U) && isFreeCall(I, &TLI)) { if (Writers) Writers->insert(CS->getParent()->getParent()); - } else if (CS.doesNotCapture(CS.getDataOperandNo(&U))) { - Function *ParentF = CS->getParent()->getParent(); - // A nocapture argument may be read from or written to, but does not - // escape unless the call can somehow recurse. - // - // nocapture "indicates that the callee does not make any copies of - // the pointer that outlive itself". Therefore if we directly or - // indirectly recurse, we must treat the pointer as escaping. - if (FunctionToSCCMap[ParentF] == - FunctionToSCCMap[CS.getCalledFunction()]) - return true; - if (Readers) - Readers->insert(ParentF); - if (Writers) - Writers->insert(ParentF); } else { return true; // Argument of an unknown call. } diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index b89ff26..6dfe625 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -70,7 +70,7 @@ static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned); static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned); static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned); -/// getFalse - For a boolean type, or a vector of boolean type, return false, or +/// For a boolean type, or a vector of boolean type, return false, or /// a vector with every element false, as appropriate for the type. static Constant *getFalse(Type *Ty) { assert(Ty->getScalarType()->isIntegerTy(1) && @@ -78,7 +78,7 @@ static Constant *getFalse(Type *Ty) { return Constant::getNullValue(Ty); } -/// getTrue - For a boolean type, or a vector of boolean type, return true, or +/// For a boolean type, or a vector of boolean type, return true, or /// a vector with every element true, as appropriate for the type. static Constant *getTrue(Type *Ty) { assert(Ty->getScalarType()->isIntegerTy(1) && @@ -100,7 +100,7 @@ static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS, CRHS == LHS; } -/// ValueDominatesPHI - Does the given value dominate the specified phi node? +/// Does the given value dominate the specified phi node? static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { Instruction *I = dyn_cast<Instruction>(V); if (!I) @@ -131,8 +131,8 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { return false; } -/// ExpandBinOp - Simplify "A op (B op' C)" by distributing op over op', turning -/// it into "(A op B) op' (A op C)". Here "op" is given by Opcode and "op'" is +/// Simplify "A op (B op' C)" by distributing op over op', turning it into +/// "(A op B) op' (A op C)". Here "op" is given by Opcode and "op'" is /// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS. /// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)". /// Returns the simplified value, or null if no simplification was performed. @@ -193,8 +193,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, return nullptr; } -/// SimplifyAssociativeBinOp - Generic simplifications for associative binary -/// operations. Returns the simpler value, or null if none was found. +/// Generic simplifications for associative binary operations. +/// Returns the simpler value, or null if none was found. static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc; @@ -290,10 +290,10 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, return nullptr; } -/// ThreadBinOpOverSelect - In the case of a binary operation with a select -/// instruction as an operand, try to simplify the binop by seeing whether -/// evaluating it on both branches of the select results in the same value. -/// Returns the common value if so, otherwise returns null. +/// In the case of a binary operation with a select instruction as an operand, +/// try to simplify the binop by seeing whether evaluating it on both branches +/// of the select results in the same value. Returns the common value if so, +/// otherwise returns null. static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. @@ -362,10 +362,9 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, return nullptr; } -/// ThreadCmpOverSelect - In the case of a comparison with a select instruction, -/// try to simplify the comparison by seeing whether both branches of the select -/// result in the same value. Returns the common value if so, otherwise returns -/// null. +/// In the case of a comparison with a select instruction, try to simplify the +/// comparison by seeing whether both branches of the select result in the same +/// value. Returns the common value if so, otherwise returns null. static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { @@ -444,10 +443,10 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, return nullptr; } -/// ThreadBinOpOverPHI - In the case of a binary operation with an operand that -/// is a PHI instruction, try to simplify the binop by seeing whether evaluating -/// it on the incoming phi values yields the same result for every value. If so -/// returns the common value, otherwise returns null. +/// In the case of a binary operation with an operand that is a PHI instruction, +/// try to simplify the binop by seeing whether evaluating it on the incoming +/// phi values yields the same result for every value. If so returns the common +/// value, otherwise returns null. static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. @@ -486,10 +485,10 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, return CommonValue; } -/// ThreadCmpOverPHI - In the case of a comparison with a PHI instruction, try -/// try to simplify the comparison by seeing whether comparing with all of the -/// incoming phi values yields the same result every time. If so returns the -/// common result, otherwise returns null. +/// In the case of a comparison with a PHI instruction, try to simplify the +/// comparison by seeing whether comparing with all of the incoming phi values +/// yields the same result every time. If so returns the common result, +/// otherwise returns null. static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. @@ -524,8 +523,8 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, return CommonValue; } -/// SimplifyAddInst - Given operands for an Add, see if we can -/// fold the result. If not, this returns null. +/// Given operands for an Add, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const Query &Q, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { @@ -656,8 +655,8 @@ static Constant *computePointerDifference(const DataLayout &DL, Value *LHS, return ConstantExpr::getSub(LHSOffset, RHSOffset); } -/// SimplifySubInst - Given operands for a Sub, see if we can -/// fold the result. If not, this returns null. +/// Given operands for a Sub, see if we can fold the result. +/// If not, this returns null. static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const Query &Q, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) @@ -889,8 +888,8 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1, return nullptr; } -/// SimplifyMulInst - Given operands for a Mul, see if we can -/// fold the result. If not, this returns null. +/// Given operands for a Mul, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { @@ -989,8 +988,8 @@ Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL, RecursionLimit); } -/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can -/// fold the result. If not, this returns null. +/// Given operands for an SDiv or UDiv, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const Query &Q, unsigned MaxRecurse) { if (Constant *C0 = dyn_cast<Constant>(Op0)) { @@ -1075,8 +1074,8 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, return nullptr; } -/// SimplifySDivInst - Given operands for an SDiv, see if we can -/// fold the result. If not, this returns null. +/// Given operands for an SDiv, see if we can fold the result. +/// If not, this returns null. static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q, unsigned MaxRecurse) { if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse)) @@ -1093,8 +1092,8 @@ Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout &DL, RecursionLimit); } -/// SimplifyUDivInst - Given operands for a UDiv, see if we can -/// fold the result. If not, this returns null. +/// Given operands for a UDiv, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, unsigned MaxRecurse) { if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse)) @@ -1154,8 +1153,8 @@ Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, RecursionLimit); } -/// SimplifyRem - Given operands for an SRem or URem, see if we can -/// fold the result. If not, this returns null. +/// Given operands for an SRem or URem, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const Query &Q, unsigned MaxRecurse) { if (Constant *C0 = dyn_cast<Constant>(Op0)) { @@ -1215,8 +1214,8 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, return nullptr; } -/// SimplifySRemInst - Given operands for an SRem, see if we can -/// fold the result. If not, this returns null. +/// Given operands for an SRem, see if we can fold the result. +/// If not, this returns null. static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q, unsigned MaxRecurse) { if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse)) @@ -1233,8 +1232,8 @@ Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout &DL, RecursionLimit); } -/// SimplifyURemInst - Given operands for a URem, see if we can -/// fold the result. If not, this returns null. +/// Given operands for a URem, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, unsigned MaxRecurse) { if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse)) @@ -1279,7 +1278,7 @@ Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, RecursionLimit); } -/// isUndefShift - Returns true if a shift by \c Amount always yields undef. +/// Returns true if a shift by \c Amount always yields undef. static bool isUndefShift(Value *Amount) { Constant *C = dyn_cast<Constant>(Amount); if (!C) @@ -1306,8 +1305,8 @@ static bool isUndefShift(Value *Amount) { return false; } -/// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can -/// fold the result. If not, this returns null. +/// Given operands for an Shl, LShr or AShr, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, const Query &Q, unsigned MaxRecurse) { if (Constant *C0 = dyn_cast<Constant>(Op0)) { @@ -1375,8 +1374,8 @@ static Value *SimplifyRightShift(unsigned Opcode, Value *Op0, Value *Op1, return nullptr; } -/// SimplifyShlInst - Given operands for an Shl, see if we can -/// fold the result. If not, this returns null. +/// Given operands for an Shl, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const Query &Q, unsigned MaxRecurse) { if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse)) @@ -1402,8 +1401,8 @@ Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, RecursionLimit); } -/// SimplifyLShrInst - Given operands for an LShr, see if we can -/// fold the result. If not, this returns null. +/// Given operands for an LShr, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const Query &Q, unsigned MaxRecurse) { if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q, @@ -1427,8 +1426,8 @@ Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, RecursionLimit); } -/// SimplifyAShrInst - Given operands for an AShr, see if we can -/// fold the result. If not, this returns null. +/// Given operands for an AShr, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const Query &Q, unsigned MaxRecurse) { if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q, @@ -1502,8 +1501,8 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, return nullptr; } -// Simplify (and (icmp ...) (icmp ...)) to true when we can tell that the range -// of possible values cannot be satisfied. +/// Simplify (and (icmp ...) (icmp ...)) to true when we can tell that the range +/// of possible values cannot be satisfied. static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { ICmpInst::Predicate Pred0, Pred1; ConstantInt *CI1, *CI2; @@ -1554,8 +1553,8 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { return nullptr; } -/// SimplifyAndInst - Given operands for an And, see if we can -/// fold the result. If not, this returns null. +/// Given operands for an And, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { @@ -1661,8 +1660,8 @@ Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL, RecursionLimit); } -// Simplify (or (icmp ...) (icmp ...)) to true when we can tell that the union -// contains all possible values. +/// Simplify (or (icmp ...) (icmp ...)) to true when we can tell that the union +/// contains all possible values. static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { ICmpInst::Predicate Pred0, Pred1; ConstantInt *CI1, *CI2; @@ -1713,8 +1712,8 @@ static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { return nullptr; } -/// SimplifyOrInst - Given operands for an Or, see if we can -/// fold the result. If not, this returns null. +/// Given operands for an Or, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { @@ -1849,8 +1848,8 @@ Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL, RecursionLimit); } -/// SimplifyXorInst - Given operands for a Xor, see if we can -/// fold the result. If not, this returns null. +/// Given operands for a Xor, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { @@ -1910,9 +1909,9 @@ static Type *GetCompareTy(Value *Op) { return CmpInst::makeCmpResultType(Op->getType()); } -/// ExtractEquivalentCondition - Rummage around inside V looking for something -/// equivalent to the comparison "LHS Pred RHS". Return such a value if found, -/// otherwise return null. Helper function for analyzing max/min idioms. +/// Rummage around inside V looking for something equivalent to the comparison +/// "LHS Pred RHS". Return such a value if found, otherwise return null. +/// Helper function for analyzing max/min idioms. static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, Value *LHS, Value *RHS) { SelectInst *SI = dyn_cast<SelectInst>(V); @@ -2100,21 +2099,17 @@ static Constant *computePointerICmp(const DataLayout &DL, // that might be resolve lazily to symbols in another dynamically-loaded // library (and, thus, could be malloc'ed by the implementation). auto IsAllocDisjoint = [](SmallVectorImpl<Value *> &Objects) { - return std::all_of(Objects.begin(), Objects.end(), - [](Value *V){ - if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) - return AI->getParent() && AI->getParent()->getParent() && - AI->isStaticAlloca(); - if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) - return (GV->hasLocalLinkage() || - GV->hasHiddenVisibility() || - GV->hasProtectedVisibility() || - GV->hasUnnamedAddr()) && - !GV->isThreadLocal(); - if (const Argument *A = dyn_cast<Argument>(V)) - return A->hasByValAttr(); - return false; - }); + return std::all_of(Objects.begin(), Objects.end(), [](Value *V) { + if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) + return AI->getParent() && AI->getFunction() && AI->isStaticAlloca(); + if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) + return (GV->hasLocalLinkage() || GV->hasHiddenVisibility() || + GV->hasProtectedVisibility() || GV->hasUnnamedAddr()) && + !GV->isThreadLocal(); + if (const Argument *A = dyn_cast<Argument>(V)) + return A->hasByValAttr(); + return false; + }); }; if ((IsNAC(LHSUObjs) && IsAllocDisjoint(RHSUObjs)) || @@ -2127,8 +2122,8 @@ static Constant *computePointerICmp(const DataLayout &DL, return nullptr; } -/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can -/// fold the result. If not, this returns null. +/// Given operands for an ICmpInst, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; @@ -3102,8 +3097,8 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, RecursionLimit); } -/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can -/// fold the result. If not, this returns null. +/// Given operands for an FCmpInst, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, FastMathFlags FMF, const Query &Q, unsigned MaxRecurse) { @@ -3227,8 +3222,7 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } -/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is -/// replaced with RepOp. +/// See if V simplifies when its operand Op is replaced with RepOp. static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, const Query &Q, unsigned MaxRecurse) { @@ -3311,8 +3305,8 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, return nullptr; } -/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold -/// the result. If not, this returns null. +/// Given operands for a SelectInst, see if we can fold the result. +/// If not, this returns null. static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal, const Query &Q, unsigned MaxRecurse) { @@ -3449,8 +3443,8 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } -/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can -/// fold the result. If not, this returns null. +/// Given operands for an GetElementPtrInst, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, const Query &Q, unsigned) { // The type of the GEP pointer operand. @@ -3542,8 +3536,8 @@ Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout &DL, Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } -/// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we -/// can fold the result. If not, this returns null. +/// Given operands for an InsertValueInst, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const Query &Q, unsigned) { @@ -3579,8 +3573,8 @@ Value *llvm::SimplifyInsertValueInst( RecursionLimit); } -/// SimplifyExtractValueInst - Given operands for an ExtractValueInst, see if we -/// can fold the result. If not, this returns null. +/// Given operands for an ExtractValueInst, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, const Query &, unsigned) { if (auto *CAgg = dyn_cast<Constant>(Agg)) @@ -3614,8 +3608,8 @@ Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, RecursionLimit); } -/// SimplifyExtractElementInst - Given operands for an ExtractElementInst, see if we -/// can fold the result. If not, this returns null. +/// Given operands for an ExtractElementInst, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &, unsigned) { if (auto *CVec = dyn_cast<Constant>(Vec)) { @@ -3646,7 +3640,7 @@ Value *llvm::SimplifyExtractElementInst( RecursionLimit); } -/// SimplifyPHINode - See if we can fold the given phi. If not, returns null. +/// See if we can fold the given phi. If not, returns null. static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { // If all of the PHI's incoming values are the same then replace the PHI node // with the common value. @@ -3696,8 +3690,8 @@ Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout &DL, //=== Helper functions for higher up the class hierarchy. -/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can -/// fold the result. If not, this returns null. +/// Given operands for a BinaryOperator, see if we can fold the result. +/// If not, this returns null. static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { switch (Opcode) { @@ -3763,8 +3757,8 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, } } -/// SimplifyFPBinOp - Given operands for a BinaryOperator, see if we can -/// fold the result. If not, this returns null. +/// Given operands for a BinaryOperator, see if we can fold the result. +/// If not, this returns null. /// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the /// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, @@ -3799,8 +3793,7 @@ Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, RecursionLimit); } -/// SimplifyCmpInst - Given operands for a CmpInst, see if we can -/// fold the result. +/// Given operands for a CmpInst, see if we can fold the result. static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) @@ -3938,8 +3931,8 @@ Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } -/// SimplifyInstruction - See if we can compute a simplified version of this -/// instruction. If not, this returns null. +/// See if we can compute a simplified version of this instruction. +/// If not, this returns null. Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC) { diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp index d7896ad..8bcdcb8 100644 --- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -845,6 +845,7 @@ int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, if (Lp != AR->getLoop()) { DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " << *Ptr << " SCEV: " << *PtrScev << "\n"); + return 0; } // The address calculation must not wrap. Otherwise, a dependence could be diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp index 9ab9eea..0c725fc 100644 --- a/contrib/llvm/lib/Analysis/LoopInfo.cpp +++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp @@ -637,8 +637,10 @@ LoopInfo::LoopInfo(const DominatorTreeBase<BasicBlock> &DomTree) { analyze(DomTree); } -void LoopInfo::updateUnloop(Loop *Unloop) { - Unloop->markUnlooped(); +void LoopInfo::markAsRemoved(Loop *Unloop) { + assert(!Unloop->isInvalid() && "Loop has already been removed"); + Unloop->invalidate(); + RemovedLoops.push_back(Unloop); // First handle the special case of no parent loop to simplify the algorithm. if (!Unloop->getParentLoop()) { diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp index dc42473..8163231 100644 --- a/contrib/llvm/lib/Analysis/LoopPass.cpp +++ b/contrib/llvm/lib/Analysis/LoopPass.cpp @@ -42,7 +42,11 @@ public: } bool runOnLoop(Loop *L, LPPassManager &) override { - P.run(*L); + auto BBI = find_if(L->blocks().begin(), L->blocks().end(), + [](BasicBlock *BB) { return BB; }); + if (BBI != L->blocks().end() && + isFunctionInPrintList((*BBI)->getParent()->getName())) + P.run(*L); return false; } }; @@ -174,8 +178,9 @@ bool LPPassManager::runOnFunction(Function &F) { // Walk Loops while (!LQ.empty()) { - + bool LoopWasDeleted = false; CurrentLoop = LQ.back(); + // Run all passes on the current Loop. for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *P = getContainedPass(Index); @@ -192,15 +197,15 @@ bool LPPassManager::runOnFunction(Function &F) { Changed |= P->runOnLoop(CurrentLoop, *this); } + LoopWasDeleted = CurrentLoop->isInvalid(); if (Changed) dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, - CurrentLoop->isUnloop() - ? "<deleted>" - : CurrentLoop->getHeader()->getName()); + LoopWasDeleted ? "<deleted>" + : CurrentLoop->getHeader()->getName()); dumpPreservedSet(P); - if (CurrentLoop->isUnloop()) { + if (LoopWasDeleted) { // Notify passes that the loop is being deleted. deleteSimpleAnalysisLoop(CurrentLoop); } else { @@ -222,12 +227,11 @@ bool LPPassManager::runOnFunction(Function &F) { removeNotPreservedAnalysis(P); recordAvailableAnalysis(P); - removeDeadPasses(P, CurrentLoop->isUnloop() - ? "<deleted>" - : CurrentLoop->getHeader()->getName(), + removeDeadPasses(P, LoopWasDeleted ? "<deleted>" + : CurrentLoop->getHeader()->getName(), ON_LOOP_MSG); - if (CurrentLoop->isUnloop()) + if (LoopWasDeleted) // Do not run other passes on this loop. break; } @@ -235,12 +239,11 @@ bool LPPassManager::runOnFunction(Function &F) { // If the loop was deleted, release all the loop passes. This frees up // some memory, and avoids trouble with the pass manager trying to call // verifyAnalysis on them. - if (CurrentLoop->isUnloop()) { + if (LoopWasDeleted) { for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { Pass *P = getContainedPass(Index); freePass(P, "<deleted>", ON_LOOP_MSG); } - delete CurrentLoop; } // Pop the loop from queue after running all passes. diff --git a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp index 029997a..486f3a5 100644 --- a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp +++ b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp @@ -26,7 +26,7 @@ // ... = load %ptr2, !alias.scope !{ !scope1, !scope2 }, !noalias !{ !scope1 } // // When evaluating an aliasing query, if one of the instructions is associated -// has a set of noalias scopes in some domain that is superset of the alias +// has a set of noalias scopes in some domain that is a superset of the alias // scopes in that domain of some other instruction, then the two memory // accesses are assumed not to alias. // diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index 805f3ef..9f92391 100644 --- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -70,7 +70,7 @@ // A a; // } B; // -// For an acess to B.a.s, we attach !5 (a path tag node) to the load/store +// For an access to B.a.s, we attach !5 (a path tag node) to the load/store // instruction. The base type is !4 (struct B), the access type is !2 (scalar // type short) and the offset is 4. // diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index abc57ed..a83e207 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -2556,6 +2556,9 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) { switch (I->getOpcode()) { default: break; + // Unsigned integers are always nonnegative. + case Instruction::UIToFP: + return true; case Instruction::FMul: // x*x is always non-negative or a NaN. if (I->getOperand(0) == I->getOperand(1)) @@ -2566,6 +2569,9 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) { case Instruction::FRem: return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) && CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1); + case Instruction::Select: + return CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1) && + CannotBeOrderedLessThanZero(I->getOperand(2), Depth+1); case Instruction::FPExt: case Instruction::FPTrunc: // Widening/narrowing never change sign. @@ -2574,6 +2580,12 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) { if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) switch (II->getIntrinsicID()) { default: break; + case Intrinsic::maxnum: + return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) || + CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1); + case Intrinsic::minnum: + return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) && + CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1); case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::fabs: diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index c7606fd..2ad4b32 100644 --- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2654,8 +2654,6 @@ std::error_code BitcodeReader::parseConstants() { return error("Invalid record"); Type *EltTy = cast<SequentialType>(CurTy)->getElementType(); - unsigned Size = Record.size(); - if (EltTy->isIntegerTy(8)) { SmallVector<uint8_t, 16> Elts(Record.begin(), Record.end()); if (isa<VectorType>(CurTy)) @@ -2680,21 +2678,24 @@ std::error_code BitcodeReader::parseConstants() { V = ConstantDataVector::get(Context, Elts); else V = ConstantDataArray::get(Context, Elts); + } else if (EltTy->isHalfTy()) { + SmallVector<uint16_t, 16> Elts(Record.begin(), Record.end()); + if (isa<VectorType>(CurTy)) + V = ConstantDataVector::getFP(Context, Elts); + else + V = ConstantDataArray::getFP(Context, Elts); } else if (EltTy->isFloatTy()) { - SmallVector<float, 16> Elts(Size); - std::transform(Record.begin(), Record.end(), Elts.begin(), BitsToFloat); + SmallVector<uint32_t, 16> Elts(Record.begin(), Record.end()); if (isa<VectorType>(CurTy)) - V = ConstantDataVector::get(Context, Elts); + V = ConstantDataVector::getFP(Context, Elts); else - V = ConstantDataArray::get(Context, Elts); + V = ConstantDataArray::getFP(Context, Elts); } else if (EltTy->isDoubleTy()) { - SmallVector<double, 16> Elts(Size); - std::transform(Record.begin(), Record.end(), Elts.begin(), - BitsToDouble); + SmallVector<uint64_t, 16> Elts(Record.begin(), Record.end()); if (isa<VectorType>(CurTy)) - V = ConstantDataVector::get(Context, Elts); + V = ConstantDataVector::getFP(Context, Elts); else - V = ConstantDataArray::get(Context, Elts); + V = ConstantDataArray::getFP(Context, Elts); } else { return error("Invalid type for value"); } diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index a1f8786..a899a0c 100644 --- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1630,19 +1630,10 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, if (isa<IntegerType>(EltTy)) { for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) Record.push_back(CDS->getElementAsInteger(i)); - } else if (EltTy->isFloatTy()) { - for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { - union { float F; uint32_t I; }; - F = CDS->getElementAsFloat(i); - Record.push_back(I); - } } else { - assert(EltTy->isDoubleTy() && "Unknown ConstantData element type"); - for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { - union { double F; uint64_t I; }; - F = CDS->getElementAsDouble(i); - Record.push_back(I); - } + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) + Record.push_back( + CDS->getElementAsAPFloat(i).bitcastToAPInt().getLimitedValue()); } } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) || isa<ConstantVector>(C)) { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index be7eafb..5f67d3d 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -192,22 +192,26 @@ bool AsmPrinter::doInitialization(Module &M) { // use the directive, where it would need the same conditionalization // anyway. Triple TT(getTargetTriple()); - if (TT.isOSDarwin()) { + // If there is a version specified, Major will be non-zero. + if (TT.isOSDarwin() && TT.getOSMajorVersion() != 0) { unsigned Major, Minor, Update; - TT.getOSVersion(Major, Minor, Update); - // If there is a version specified, Major will be non-zero. - if (Major) { - MCVersionMinType VersionType; - if (TT.isWatchOS()) - VersionType = MCVM_WatchOSVersionMin; - else if (TT.isTvOS()) - VersionType = MCVM_TvOSVersionMin; - else if (TT.isMacOSX()) - VersionType = MCVM_OSXVersionMin; - else - VersionType = MCVM_IOSVersionMin; - OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update); + MCVersionMinType VersionType; + if (TT.isWatchOS()) { + VersionType = MCVM_WatchOSVersionMin; + TT.getWatchOSVersion(Major, Minor, Update); + } else if (TT.isTvOS()) { + VersionType = MCVM_TvOSVersionMin; + TT.getiOSVersion(Major, Minor, Update); + } else if (TT.isMacOSX()) { + VersionType = MCVM_OSXVersionMin; + if (!TT.getMacOSXVersion(Major, Minor, Update)) + Major = 0; + } else { + VersionType = MCVM_IOSVersionMin; + TT.getiOSVersion(Major, Minor, Update); } + if (Major != 0) + OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update); } // Allow the target to emit any magic that it wants at the start of the file. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index bf794f7..7b0cdbd 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -32,6 +32,39 @@ using namespace llvm; //===----------------------------------------------------------------------===// +// EmittingAsmStreamer Implementation +//===----------------------------------------------------------------------===// +unsigned EmittingAsmStreamer::emitULEB128(uint64_t Value, const char *Desc, + unsigned PadTo) { + AP->EmitULEB128(Value, Desc, PadTo); + return 0; +} + +unsigned EmittingAsmStreamer::emitInt8(unsigned char Value) { + AP->EmitInt8(Value); + return 0; +} + +unsigned EmittingAsmStreamer::emitBytes(StringRef Data) { + AP->OutStreamer->EmitBytes(Data); + return 0; +} + +//===----------------------------------------------------------------------===// +// SizeReporterAsmStreamer Implementation +//===----------------------------------------------------------------------===// +unsigned SizeReporterAsmStreamer::emitULEB128(uint64_t Value, const char *Desc, + unsigned PadTo) { + return getULEB128Size(Value); +} + +unsigned SizeReporterAsmStreamer::emitInt8(unsigned char Value) { return 1; } + +unsigned SizeReporterAsmStreamer::emitBytes(StringRef Data) { + return Data.size(); +} + +//===----------------------------------------------------------------------===// // DIEAbbrevData Implementation //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 1f0c06f..a4fb07e 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -561,6 +561,8 @@ void DwarfDebug::finalizeModuleInfo() { // Collect info for variables that were optimized out. collectDeadVariables(); + unsigned MacroOffset = 0; + std::unique_ptr<AsmStreamerBase> AS(new SizeReporterAsmStreamer(Asm)); // Handle anything that needs to be done on a per-unit basis after // all other generation. for (const auto &P : CUMap) { @@ -613,6 +615,15 @@ void DwarfDebug::finalizeModuleInfo() { U.setBaseAddress(TheCU.getRanges().front().getStart()); U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges()); } + + auto *CUNode = cast<DICompileUnit>(P.first); + if (CUNode->getMacros()) { + // Compile Unit has macros, emit "DW_AT_macro_info" attribute. + U.addUInt(U.getUnitDie(), dwarf::DW_AT_macro_info, + dwarf::DW_FORM_sec_offset, MacroOffset); + // Update macro section offset + MacroOffset += handleMacroNodes(AS.get(), CUNode->getMacros(), U); + } } // Compute DIE offsets and sizes. @@ -656,6 +667,9 @@ void DwarfDebug::endModule() { // Emit info into a debug ranges section. emitDebugRanges(); + // Emit info into a debug macinfo section. + emitDebugMacinfo(); + if (useSplitDwarf()) { emitDebugStrDWO(); emitDebugInfoDWO(); @@ -1833,6 +1847,70 @@ void DwarfDebug::emitDebugRanges() { } } +unsigned DwarfDebug::handleMacroNodes(AsmStreamerBase *AS, + DIMacroNodeArray Nodes, + DwarfCompileUnit &U) { + unsigned Size = 0; + for (auto *MN : Nodes) { + if (auto *M = dyn_cast<DIMacro>(MN)) + Size += emitMacro(AS, *M); + else if (auto *F = dyn_cast<DIMacroFile>(MN)) + Size += emitMacroFile(AS, *F, U); + else + llvm_unreachable("Unexpected DI type!"); + } + return Size; +} + +unsigned DwarfDebug::emitMacro(AsmStreamerBase *AS, DIMacro &M) { + int Size = 0; + Size += AS->emitULEB128(M.getMacinfoType()); + Size += AS->emitULEB128(M.getLine()); + StringRef Name = M.getName(); + StringRef Value = M.getValue(); + Size += AS->emitBytes(Name); + if (!Value.empty()) { + // There should be one space between macro name and macro value. + Size += AS->emitInt8(' '); + Size += AS->emitBytes(Value); + } + Size += AS->emitInt8('\0'); + return Size; +} + +unsigned DwarfDebug::emitMacroFile(AsmStreamerBase *AS, DIMacroFile &F, + DwarfCompileUnit &U) { + int Size = 0; + assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file); + Size += AS->emitULEB128(dwarf::DW_MACINFO_start_file); + Size += AS->emitULEB128(F.getLine()); + DIFile *File = F.getFile(); + unsigned FID = + U.getOrCreateSourceID(File->getFilename(), File->getDirectory()); + Size += AS->emitULEB128(FID); + Size += handleMacroNodes(AS, F.getElements(), U); + Size += AS->emitULEB128(dwarf::DW_MACINFO_end_file); + return Size; +} + +// Emit visible names into a debug macinfo section. +void DwarfDebug::emitDebugMacinfo() { + if (MCSection *Macinfo = Asm->getObjFileLowering().getDwarfMacinfoSection()) { + // Start the dwarf macinfo section. + Asm->OutStreamer->SwitchSection(Macinfo); + } + std::unique_ptr<AsmStreamerBase> AS(new EmittingAsmStreamer(Asm)); + for (const auto &P : CUMap) { + auto &TheCU = *P.second; + auto *SkCU = TheCU.getSkeleton(); + DwarfCompileUnit &U = SkCU ? *SkCU : TheCU; + auto *CUNode = cast<DICompileUnit>(P.first); + handleMacroNodes(AS.get(), CUNode->getMacros(), U); + } + Asm->OutStreamer->AddComment("End Of Macro List Mark"); + Asm->EmitInt8(0); +} + // DWARF5 Experimental Separate Dwarf emitters. void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 4c613a9..460c186 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -400,18 +400,26 @@ class DwarfDebug : public AsmPrinterHandler { /// Emit visible names into a debug str section. void emitDebugStr(); - /// Emit visible names into a debug loc section. + /// Emit variable locations into a debug loc section. void emitDebugLoc(); - /// Emit visible names into a debug loc dwo section. + /// Emit variable locations into a debug loc dwo section. void emitDebugLocDWO(); - /// Emit visible names into a debug aranges section. + /// Emit address ranges into a debug aranges section. void emitDebugARanges(); - /// Emit visible names into a debug ranges section. + /// Emit address ranges into a debug ranges section. void emitDebugRanges(); + /// Emit macros into a debug macinfo section. + void emitDebugMacinfo(); + unsigned emitMacro(AsmStreamerBase *AS, DIMacro &M); + unsigned emitMacroFile(AsmStreamerBase *AS, DIMacroFile &F, + DwarfCompileUnit &U); + unsigned handleMacroNodes(AsmStreamerBase *AS, DIMacroNodeArray Nodes, + DwarfCompileUnit &U); + /// DWARF 5 Experimental Split Dwarf Emitters /// Initialize common features of skeleton units. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp index c2c0f84..1e2f55b 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp @@ -82,13 +82,24 @@ void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL, const MDNode *Scope = DL.getScope(); if (!Scope) return; + unsigned LineNumber = DL.getLine(); + // Skip this line if it is longer than the maximum we can record. + if (LineNumber > COFF::CVL_MaxLineNumber) + return; + + unsigned ColumnNumber = DL.getCol(); + // Truncate the column number if it is longer than the maximum we can record. + if (ColumnNumber > COFF::CVL_MaxColumnNumber) + ColumnNumber = 0; + StringRef Filename = getFullFilepath(Scope); // Skip this instruction if it has the same file:line as the previous one. assert(CurFn); if (!CurFn->Instrs.empty()) { const InstrInfoTy &LastInstr = InstrInfo[CurFn->Instrs.back()]; - if (LastInstr.Filename == Filename && LastInstr.LineNumber == DL.getLine()) + if (LastInstr.Filename == Filename && LastInstr.LineNumber == LineNumber && + LastInstr.ColumnNumber == ColumnNumber) return; } FileNameRegistry.add(Filename); @@ -96,7 +107,7 @@ void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL, MCSymbol *MCL = Asm->MMI->getContext().createTempSymbol(); Asm->OutStreamer->EmitLabel(MCL); CurFn->Instrs.push_back(MCL); - InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine(), DL.getCol()); + InstrInfo[MCL] = InstrInfoTy(Filename, LineNumber, ColumnNumber); } WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP) @@ -282,8 +293,9 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { ColSegEnd = ColSegI + FilenameSegmentLengths[LastSegmentStart]; ColSegI != ColSegEnd; ++ColSegI) { unsigned ColumnNumber = InstrInfo[FI.Instrs[ColSegI]].ColumnNumber; + assert(ColumnNumber <= COFF::CVL_MaxColumnNumber); Asm->EmitInt16(ColumnNumber); // Start column - Asm->EmitInt16(ColumnNumber); // End column + Asm->EmitInt16(0); // End column } Asm->OutStreamer->EmitLabel(FileSegmentEnd); }; @@ -320,7 +332,10 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { // The first PC with the given linenumber and the linenumber itself. EmitLabelDiff(*Asm->OutStreamer, Fn, Instr); - Asm->EmitInt32(InstrInfo[Instr].LineNumber); + uint32_t LineNumber = InstrInfo[Instr].LineNumber; + assert(LineNumber <= COFF::CVL_MaxLineNumber); + uint32_t LineData = LineNumber | COFF::CVL_IsStatement; + Asm->EmitInt32(LineData); } FinishPreviousChunk(); diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp index 604feed..df5cac5 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -744,18 +744,6 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, return true; } -static bool hasIdenticalMMOs(const MachineInstr *MI1, const MachineInstr *MI2) { - auto I1 = MI1->memoperands_begin(), E1 = MI1->memoperands_end(); - auto I2 = MI2->memoperands_begin(), E2 = MI2->memoperands_end(); - if ((E1 - I1) != (E2 - I2)) - return false; - for (; I1 != E1; ++I1, ++I2) { - if (**I1 != **I2) - return false; - } - return true; -} - static void removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, MachineBasicBlock &MBBCommon) { @@ -792,8 +780,7 @@ removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!"); if (MBBICommon->mayLoad() || MBBICommon->mayStore()) - if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon)) - MBBICommon->dropMemRefs(); + MBBICommon->setMemRefs(MBBICommon->mergeMemRefsWith(*MBBI)); ++MBBI; ++MBBICommon; diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp index 6fbdea8..03e5778 100644 --- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1108,7 +1108,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, // <16 x i1> %mask, <16 x i32> %passthru) // to a chain of basic blocks, with loading element one-by-one if // the appropriate mask bit is set -// +// // %1 = bitcast i8* %addr to i32* // %2 = extractelement <16 x i1> %mask, i32 0 // %3 = icmp eq i1 %2, true @@ -1272,12 +1272,12 @@ static void ScalarizeMaskedLoad(CallInst *CI) { // %5 = getelementptr i32* %1, i32 0 // store i32 %4, i32* %5 // br label %else -// +// // else: ; preds = %0, %cond.store // %6 = extractelement <16 x i1> %mask, i32 1 // %7 = icmp eq i1 %6, true // br i1 %7, label %cond.store1, label %else2 -// +// // cond.store1: ; preds = %else // %8 = extractelement <16 x i32> %val, i32 1 // %9 = getelementptr i32* %1, i32 1 @@ -1377,24 +1377,24 @@ static void ScalarizeMaskedStore(CallInst *CI) { // <16 x i1> %Mask, <16 x i32> %Src) // to a chain of basic blocks, with loading element one-by-one if // the appropriate mask bit is set -// +// // % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind // % Mask0 = extractelement <16 x i1> %Mask, i32 0 // % ToLoad0 = icmp eq i1 % Mask0, true // br i1 % ToLoad0, label %cond.load, label %else -// +// // cond.load: // % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 // % Load0 = load i32, i32* % Ptr0, align 4 // % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0 // br label %else -// +// // else: // %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0] // % Mask1 = extractelement <16 x i1> %Mask, i32 1 // % ToLoad1 = icmp eq i1 % Mask1, true // br i1 % ToLoad1, label %cond.load1, label %else2 -// +// // cond.load1: // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 // % Load1 = load i32, i32* % Ptr1, align 4 @@ -1526,7 +1526,7 @@ static void ScalarizeMaskedGather(CallInst *CI) { // % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 // store i32 %Elt0, i32* % Ptr0, align 4 // br label %else -// +// // else: // % Mask1 = extractelement <16 x i1> % Mask, i32 1 // % ToStore1 = icmp eq i1 % Mask1, true diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp index 98d30b9..b9937e5 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp @@ -19,6 +19,8 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -30,7 +32,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include <deque> +#include <queue> #include <list> using namespace llvm; @@ -76,16 +78,13 @@ private: typedef std::list<VarLoc> VarLocList; typedef SmallDenseMap<const MachineBasicBlock *, VarLocList> VarLocInMBB; - bool OLChanged; // OutgoingLocs got changed for this bb. - bool MBBJoined; // The MBB was joined. - void transferDebugValue(MachineInstr &MI, VarLocList &OpenRanges); void transferRegisterDef(MachineInstr &MI, VarLocList &OpenRanges); - void transferTerminatorInst(MachineInstr &MI, VarLocList &OpenRanges, + bool transferTerminatorInst(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs); - void transfer(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs); + bool transfer(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs); - void join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs); + bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs); bool ExtendRanges(MachineFunction &MF); @@ -225,24 +224,18 @@ void LiveDebugValues::transferRegisterDef(MachineInstr &MI, } /// Terminate all open ranges at the end of the current basic block. -void LiveDebugValues::transferTerminatorInst(MachineInstr &MI, +bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs) { + bool Changed = false; const MachineBasicBlock *CurMBB = MI.getParent(); if (!(MI.isTerminator() || (&MI == &CurMBB->instr_back()))) - return; + return false; if (OpenRanges.empty()) - return; + return false; - if (OutLocs.find(CurMBB) == OutLocs.end()) { - // Create space for new Outgoing locs entries. - VarLocList VLL; - OutLocs.insert(std::make_pair(CurMBB, std::move(VLL))); - } - auto OL = OutLocs.find(CurMBB); - assert(OL != OutLocs.end()); - VarLocList &VLL = OL->second; + VarLocList &VLL = OutLocs[CurMBB]; for (auto OR : OpenRanges) { // Copy OpenRanges to OutLocs, if not already present. @@ -251,28 +244,30 @@ void LiveDebugValues::transferTerminatorInst(MachineInstr &MI, if (std::find_if(VLL.begin(), VLL.end(), [&](const VarLoc &V) { return (OR == V); }) == VLL.end()) { VLL.push_back(std::move(OR)); - OLChanged = true; + Changed = true; } } OpenRanges.clear(); + return Changed; } /// This routine creates OpenRanges and OutLocs. -void LiveDebugValues::transfer(MachineInstr &MI, VarLocList &OpenRanges, +bool LiveDebugValues::transfer(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs) { + bool Changed = false; transferDebugValue(MI, OpenRanges); transferRegisterDef(MI, OpenRanges); - transferTerminatorInst(MI, OpenRanges, OutLocs); + Changed = transferTerminatorInst(MI, OpenRanges, OutLocs); + return Changed; } /// This routine joins the analysis results of all incoming edges in @MBB by /// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same /// source variable in all the predecessors of @MBB reside in the same location. -void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, +bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs) { DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n"); - - MBBJoined = false; + bool Changed = false; VarLocList InLocsT; // Temporary incoming locations. @@ -282,7 +277,7 @@ void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, auto OL = OutLocs.find(p); // Join is null in case of empty OutLocs from any of the pred. if (OL == OutLocs.end()) - return; + return false; // Just copy over the Out locs to incoming locs for the first predecessor. if (p == *MBB.pred_begin()) { @@ -292,27 +287,18 @@ void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, // Join with this predecessor. VarLocList &VLL = OL->second; - InLocsT.erase(std::remove_if(InLocsT.begin(), InLocsT.end(), - [&](VarLoc &ILT) { - return (std::find_if(VLL.begin(), VLL.end(), - [&](const VarLoc &V) { - return (ILT == V); - }) == VLL.end()); - }), - InLocsT.end()); + InLocsT.erase( + std::remove_if(InLocsT.begin(), InLocsT.end(), [&](VarLoc &ILT) { + return (std::find_if(VLL.begin(), VLL.end(), [&](const VarLoc &V) { + return (ILT == V); + }) == VLL.end()); + }), InLocsT.end()); } if (InLocsT.empty()) - return; + return false; - if (InLocs.find(&MBB) == InLocs.end()) { - // Create space for new Incoming locs entries. - VarLocList VLL; - InLocs.insert(std::make_pair(&MBB, std::move(VLL))); - } - auto IL = InLocs.find(&MBB); - assert(IL != InLocs.end()); - VarLocList &ILL = IL->second; + VarLocList &ILL = InLocs[&MBB]; // Insert DBG_VALUE instructions, if not already inserted. for (auto ILT : InLocsT) { @@ -331,12 +317,13 @@ void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, MI->getOperand(1).setImm(DMI->getOperand(1).getImm()); DEBUG(dbgs() << "Inserted: "; MI->dump();); ++NumInserted; - MBBJoined = true; // rerun transfer(). + Changed = true; VarLoc V(ILT.Var, MI); ILL.push_back(std::move(V)); } } + return Changed; } /// Calculate the liveness information for the given machine function and @@ -346,48 +333,72 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { DEBUG(dbgs() << "\nDebug Range Extension\n"); bool Changed = false; - OLChanged = MBBJoined = false; + bool OLChanged = false; + bool MBBJoined = false; VarLocList OpenRanges; // Ranges that are open until end of bb. VarLocInMBB OutLocs; // Ranges that exist beyond bb. VarLocInMBB InLocs; // Ranges that are incoming after joining. - std::deque<MachineBasicBlock *> BBWorklist; - + DenseMap<unsigned int, MachineBasicBlock *> OrderToBB; + DenseMap<MachineBasicBlock *, unsigned int> BBToOrder; + std::priority_queue<unsigned int, std::vector<unsigned int>, + std::greater<unsigned int>> Worklist; + std::priority_queue<unsigned int, std::vector<unsigned int>, + std::greater<unsigned int>> Pending; // Initialize every mbb with OutLocs. for (auto &MBB : MF) for (auto &MI : MBB) transfer(MI, OpenRanges, OutLocs); DEBUG(printVarLocInMBB(OutLocs, "OutLocs after initialization", dbgs())); - // Construct a worklist of MBBs. - for (auto &MBB : MF) - BBWorklist.push_back(&MBB); - - // Perform join() and transfer() using the worklist until the ranges converge - // Ranges have converged when the worklist is empty. - while (!BBWorklist.empty()) { - MachineBasicBlock *MBB = BBWorklist.front(); - BBWorklist.pop_front(); - - join(*MBB, OutLocs, InLocs); + ReversePostOrderTraversal<MachineFunction *> RPOT(&MF); + unsigned int RPONumber = 0; + for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) { + OrderToBB[RPONumber] = *RI; + BBToOrder[*RI] = RPONumber; + Worklist.push(RPONumber); + ++RPONumber; + } - if (MBBJoined) { - Changed = true; - for (auto &MI : *MBB) - transfer(MI, OpenRanges, OutLocs); - DEBUG(printVarLocInMBB(OutLocs, "OutLocs after propagating", dbgs())); - DEBUG(printVarLocInMBB(InLocs, "InLocs after propagating", dbgs())); - - if (OLChanged) { - OLChanged = false; - for (auto s : MBB->successors()) - if (std::find(BBWorklist.begin(), BBWorklist.end(), s) == - BBWorklist.end()) // add if not already present. - BBWorklist.push_back(s); + // This is a standard "union of predecessor outs" dataflow problem. + // To solve it, we perform join() and transfer() using the two worklist method + // until the ranges converge. + // Ranges have converged when both worklists are empty. + while (!Worklist.empty() || !Pending.empty()) { + // We track what is on the pending worklist to avoid inserting the same + // thing twice. We could avoid this with a custom priority queue, but this + // is probably not worth it. + SmallPtrSet<MachineBasicBlock *, 16> OnPending; + while (!Worklist.empty()) { + MachineBasicBlock *MBB = OrderToBB[Worklist.top()]; + Worklist.pop(); + MBBJoined = join(*MBB, OutLocs, InLocs); + + if (MBBJoined) { + MBBJoined = false; + Changed = true; + for (auto &MI : *MBB) + OLChanged |= transfer(MI, OpenRanges, OutLocs); + DEBUG(printVarLocInMBB(OutLocs, "OutLocs after propagating", dbgs())); + DEBUG(printVarLocInMBB(InLocs, "InLocs after propagating", dbgs())); + + if (OLChanged) { + OLChanged = false; + for (auto s : MBB->successors()) + if (!OnPending.count(s)) { + OnPending.insert(s); + Pending.push(BBToOrder[s]); + } + } } } + Worklist.swap(Pending); + // At this point, pending must be empty, since it was just the empty + // worklist + assert(Pending.empty() && "Pending should be empty"); } + DEBUG(printVarLocInMBB(OutLocs, "Final OutLocs", dbgs())); DEBUG(printVarLocInMBB(InLocs, "Final InLocs", dbgs())); return Changed; diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp index efad36f..bb34883 100644 --- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp +++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp @@ -1328,15 +1328,15 @@ void LiveRangeUpdater::flush() { LR->verify(); } -unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { +unsigned ConnectedVNInfoEqClasses::Classify(const LiveRange &LR) { // Create initial equivalence classes. EqClass.clear(); - EqClass.grow(LI->getNumValNums()); + EqClass.grow(LR.getNumValNums()); const VNInfo *used = nullptr, *unused = nullptr; // Determine connections. - for (const VNInfo *VNI : LI->valnos) { + for (const VNInfo *VNI : LR.valnos) { // Group all unused values into one class. if (VNI->isUnused()) { if (unused) @@ -1351,14 +1351,14 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { // Connect to values live out of predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) - if (const VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI))) + if (const VNInfo *PVNI = LR.getVNInfoBefore(LIS.getMBBEndIdx(*PI))) EqClass.join(VNI->id, PVNI->id); } else { // Normal value defined by an instruction. Check for two-addr redef. // FIXME: This could be coincidental. Should we really check for a tied // operand constraint? // Note that VNI->def may be a use slot for an early clobber def. - if (const VNInfo *UVNI = LI->getVNInfoBefore(VNI->def)) + if (const VNInfo *UVNI = LR.getVNInfoBefore(VNI->def)) EqClass.join(VNI->id, UVNI->id); } } diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp index 9451d92..a506e05 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1446,7 +1446,7 @@ void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) { void LiveIntervals::splitSeparateComponents(LiveInterval &LI, SmallVectorImpl<LiveInterval*> &SplitLIs) { ConnectedVNInfoEqClasses ConEQ(*this); - unsigned NumComp = ConEQ.Classify(&LI); + unsigned NumComp = ConEQ.Classify(LI); if (NumComp <= 1) return; DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n'); diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp index 76099f2..85d544d 100644 --- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -1182,7 +1182,7 @@ MachineBasicBlock::getProbabilityIterator(MachineBasicBlock::succ_iterator I) { /// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed /// as of just before "MI". -/// +/// /// Search is localised to a neighborhood of /// Neighborhood instructions before (searching for defs or kills) and N /// instructions after (searching just for defs) MI. diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp index 790f5ac..4f424ff 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -31,7 +31,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { const std::string Banner; MachineFunctionPrinterPass() : MachineFunctionPass(ID), OS(dbgs()) { } - MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner) + MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner) : MachineFunctionPass(ID), OS(os), Banner(banner) {} const char *getPassName() const override { return "MachineFunction Printer"; } @@ -42,6 +42,8 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { } bool runOnMachineFunction(MachineFunction &MF) override { + if (!llvm::isFunctionInPrintList(MF.getName())) + return false; OS << "# " << Banner << ":\n"; MF.print(OS, getAnalysisIfAvailable<SlotIndexes>()); return false; diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index 6b8eecc..6dca74d 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -866,14 +866,44 @@ void MachineInstr::addMemOperand(MachineFunction &MF, setMemRefs(NewMemRefs, NewMemRefs + NewNum); } +/// Check to see if the MMOs pointed to by the two MemRefs arrays are +/// identical. +static bool hasIdenticalMMOs(const MachineInstr &MI1, const MachineInstr &MI2) { + auto I1 = MI1.memoperands_begin(), E1 = MI1.memoperands_end(); + auto I2 = MI2.memoperands_begin(), E2 = MI2.memoperands_end(); + if ((E1 - I1) != (E2 - I2)) + return false; + for (; I1 != E1; ++I1, ++I2) { + if (**I1 != **I2) + return false; + } + return true; +} + std::pair<MachineInstr::mmo_iterator, unsigned> MachineInstr::mergeMemRefsWith(const MachineInstr& Other) { - // TODO: If we end up with too many memory operands, return the empty - // conservative set rather than failing asserts. + + // If either of the incoming memrefs are empty, we must be conservative and + // treat this as if we've exhausted our space for memrefs and dropped them. + if (memoperands_empty() || Other.memoperands_empty()) + return std::make_pair(nullptr, 0); + + // If both instructions have identical memrefs, we don't need to merge them. + // Since many instructions have a single memref, and we tend to merge things + // like pairs of loads from the same location, this catches a large number of + // cases in practice. + if (hasIdenticalMMOs(*this, Other)) + return std::make_pair(MemRefs, NumMemRefs); + // TODO: consider uniquing elements within the operand lists to reduce // space usage and fall back to conservative information less often. - size_t CombinedNumMemRefs = (memoperands_end() - memoperands_begin()) - + (Other.memoperands_end() - Other.memoperands_begin()); + size_t CombinedNumMemRefs = NumMemRefs + Other.NumMemRefs; + + // If we don't have enough room to store this many memrefs, be conservative + // and drop them. Otherwise, we'd fail asserts when trying to add them to + // the new instruction. + if (CombinedNumMemRefs != uint8_t(CombinedNumMemRefs)) + return std::make_pair(nullptr, 0); MachineFunction *MF = getParent()->getParent(); mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs); diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp index a8368e9..99a97d2 100644 --- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -334,12 +334,11 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { // writes to all slots. if (MI->memoperands_empty()) return true; - for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), - oe = MI->memoperands_end(); o != oe; ++o) { - if (!(*o)->isStore() || !(*o)->getPseudoValue()) + for (const MachineMemOperand *MemOp : MI->memoperands()) { + if (!MemOp->isStore() || !MemOp->getPseudoValue()) continue; if (const FixedStackPseudoSourceValue *Value = - dyn_cast<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) { + dyn_cast<FixedStackPseudoSourceValue>(MemOp->getPseudoValue())) { if (Value->getFrameIndex() == FI) return true; } @@ -357,8 +356,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI, bool RuledOut = false; bool HasNonInvariantUse = false; unsigned Def = 0; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (MO.isFI()) { // Remember if the instruction stores to the frame index. int FI = MO.getIndex(); @@ -452,9 +450,7 @@ void MachineLICM::HoistRegionPostRA() { // Walk the entire region, count number of defs for each register, and // collect potential LICM candidates. const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); - for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { - MachineBasicBlock *BB = Blocks[i]; - + for (MachineBasicBlock *BB : Blocks) { // If the header of the loop containing this basic block is a landing pad, // then don't try to hoist instructions out of this loop. const MachineLoop *ML = MLI->getLoopFor(BB); @@ -469,19 +465,15 @@ void MachineLICM::HoistRegionPostRA() { } SpeculationState = SpeculateUnknown; - for (MachineBasicBlock::iterator - MII = BB->begin(), E = BB->end(); MII != E; ++MII) { - MachineInstr *MI = &*MII; - ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); - } + for (MachineInstr &MI : *BB) + ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); } // Gather the registers read / clobbered by the terminator. BitVector TermRegs(NumRegs); MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); if (TI != Preheader->end()) { - for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = TI->getOperand(i); + for (const MachineOperand &MO : TI->operands()) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); @@ -500,17 +492,16 @@ void MachineLICM::HoistRegionPostRA() { // 3. Make sure candidate def should not clobber // registers read by the terminator. Similarly its def should not be // clobbered by the terminator. - for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { - if (Candidates[i].FI != INT_MIN && - StoredFIs.count(Candidates[i].FI)) + for (CandidateInfo &Candidate : Candidates) { + if (Candidate.FI != INT_MIN && + StoredFIs.count(Candidate.FI)) continue; - unsigned Def = Candidates[i].Def; + unsigned Def = Candidate.Def; if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { bool Safe = true; - MachineInstr *MI = Candidates[i].MI; - for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { - const MachineOperand &MO = MI->getOperand(j); + MachineInstr *MI = Candidate.MI; + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || MO.isDef() || !MO.getReg()) continue; unsigned Reg = MO.getReg(); @@ -523,7 +514,7 @@ void MachineLICM::HoistRegionPostRA() { } } if (Safe) - HoistPostRA(MI, Candidates[i].Def); + HoistPostRA(MI, Candidate.Def); } } } @@ -532,15 +523,11 @@ void MachineLICM::HoistRegionPostRA() { /// sure it is not killed by any instructions in the loop. void MachineLICM::AddToLiveIns(unsigned Reg) { const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); - for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { - MachineBasicBlock *BB = Blocks[i]; + for (MachineBasicBlock *BB : Blocks) { if (!BB->isLiveIn(Reg)) BB->addLiveIn(Reg); - for (MachineBasicBlock::iterator - MII = BB->begin(), E = BB->end(); MII != E; ++MII) { - MachineInstr *MI = &*MII; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (MachineInstr &MI : *BB) { + for (MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) MO.setIsKill(false); @@ -582,8 +569,8 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { // Check loop exiting blocks. SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks; CurLoop->getExitingBlocks(CurrentLoopExitingBlocks); - for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i) - if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) { + for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks) + if (!DT->dominates(BB, CurrentLoopExitingBlock)) { SpeculationState = SpeculateTrue; return false; } @@ -689,8 +676,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { InitRegPressure(Preheader); // Now perform LICM. - for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { - MachineDomTreeNode *Node = Scopes[i]; + for (MachineDomTreeNode *Node : Scopes) { MachineBasicBlock *MBB = Node->getBlock(); EnterScope(MBB); @@ -858,13 +844,11 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { if (MI.memoperands_empty()) return true; - for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), - E = MI.memoperands_end(); I != E; ++I) { - if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) { + for (MachineMemOperand *MemOp : MI.memoperands()) + if (const PseudoSourceValue *PSV = MemOp->getPseudoValue()) if (PSV->isGOT() || PSV->isConstantPool()) return true; - } - } + return false; } @@ -899,9 +883,7 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { return false; // The instruction is loop invariant if all of its operands are. - for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = I.getOperand(i); - + for (const MachineOperand &MO : I.operands()) { if (!MO.isReg()) continue; @@ -1230,11 +1212,8 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { /// preheader that may become duplicates of instructions that are hoisted /// out of the loop. void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { - for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { - const MachineInstr *MI = &*I; - unsigned Opcode = MI->getOpcode(); - CSEMap[Opcode].push_back(MI); - } + for (MachineInstr &MI : *BB) + CSEMap[MI.getOpcode()].push_back(&MI); } /// Find an instruction amount PrevMIs that is a duplicate of MI. @@ -1242,11 +1221,10 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { const MachineInstr* MachineLICM::LookForDuplicate(const MachineInstr *MI, std::vector<const MachineInstr*> &PrevMIs) { - for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { - const MachineInstr *PrevMI = PrevMIs[i]; + for (const MachineInstr *PrevMI : PrevMIs) if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : nullptr))) return PrevMI; - } + return nullptr; } @@ -1296,8 +1274,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, } } - for (unsigned i = 0, e = Defs.size(); i != e; ++i) { - unsigned Idx = Defs[i]; + for (unsigned Idx : Defs) { unsigned Reg = MI->getOperand(Idx).getReg(); unsigned DupReg = Dup->getOperand(Idx).getReg(); MRI->replaceRegWith(Reg, DupReg); @@ -1370,11 +1347,9 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { // Clear the kill flags of any register this instruction defines, // since they may need to be live throughout the entire loop // rather than just live for part of it. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (MachineOperand &MO : MI->operands()) if (MO.isReg() && MO.isDef() && !MO.isDead()) MRI->clearKillFlags(MO.getReg()); - } // Add to the CSE map. if (CI != CSEMap.end()) diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index cdcd8eb..428295e 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1736,7 +1736,7 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { // Check the LI only has one connected component. ConnectedVNInfoEqClasses ConEQ(*LiveInts); - unsigned NumComp = ConEQ.Classify(&LI); + unsigned NumComp = ConEQ.Classify(LI); if (NumComp > 1) { report("Multiple connected components in live interval", MF); report_context(LI); diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp index e7b3217..c1ff13e 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -2874,7 +2874,7 @@ void RegisterCoalescer::joinAllIntervals() { std::vector<MBBPriorityInfo> MBBs; MBBs.reserve(MF->size()); - for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){ + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB), JoinSplitEdges && isSplitEdge(MBB))); diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp index 3749b1d..f33dc3e 100644 --- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp @@ -313,21 +313,6 @@ static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) { namespace { -/// List of register defined and used by a machine instruction. -class RegisterOperands { -public: - SmallVector<unsigned, 8> Uses; - SmallVector<unsigned, 8> Defs; - SmallVector<unsigned, 8> DeadDefs; - - void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, - const MachineRegisterInfo &MRI, bool IgnoreDead = false); - - /// Use liveness information to find dead defs not marked with a dead flag - /// and move them to the DeadDefs vector. - void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS); -}; - /// Collect this instruction's unique uses and defs into SmallVectors for /// processing defs and uses in order. /// @@ -385,9 +370,11 @@ class RegisterOperandsCollector { } } - friend class RegisterOperands; + friend class llvm::RegisterOperands; }; +} // namespace + void RegisterOperands::collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, @@ -417,8 +404,6 @@ void RegisterOperands::detectDeadDefs(const MachineInstr &MI, } } -} // namespace - /// Initialize an array of N PressureDiffs. void PressureDiffs::init(unsigned N) { Size = N; @@ -431,6 +416,18 @@ void PressureDiffs::init(unsigned N) { PDiffArray = reinterpret_cast<PressureDiff*>(calloc(N, sizeof(PressureDiff))); } +void PressureDiffs::addInstruction(unsigned Idx, + const RegisterOperands &RegOpers, + const MachineRegisterInfo &MRI) { + PressureDiff &PDiff = (*this)[Idx]; + assert(!PDiff.begin()->isValid() && "stale PDiff"); + for (unsigned Reg : RegOpers.Defs) + PDiff.addPressureChange(Reg, true, &MRI); + + for (unsigned Reg : RegOpers.Uses) + PDiff.addPressureChange(Reg, false, &MRI); +} + /// Add a change in pressure to the pressure diff of a given instruction. void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec, const MachineRegisterInfo *MRI) { @@ -467,18 +464,6 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec, } } -/// Record the pressure difference induced by the given operand list. -static void collectPDiff(PressureDiff &PDiff, RegisterOperands &RegOpers, - const MachineRegisterInfo *MRI) { - assert(!PDiff.begin()->isValid() && "stale PDiff"); - - for (unsigned Reg : RegOpers.Defs) - PDiff.addPressureChange(Reg, true, MRI); - - for (unsigned Reg : RegOpers.Uses) - PDiff.addPressureChange(Reg, false, MRI); -} - /// Force liveness of registers. void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) { for (unsigned Reg : Regs) { @@ -514,39 +499,10 @@ void RegPressureTracker::discoverLiveOut(unsigned Reg) { /// registers that are both defined and used by the instruction. If a pressure /// difference pointer is provided record the changes is pressure caused by this /// instruction independent of liveness. -void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses, - PressureDiff *PDiff) { - assert(CurrPos != MBB->begin()); - if (!isBottomClosed()) - closeBottom(); - - // Open the top of the region using block iterators. - if (!RequireIntervals && isTopClosed()) - static_cast<RegionPressure&>(P).openTop(CurrPos); - - // Find the previous instruction. - do - --CurrPos; - while (CurrPos != MBB->begin() && CurrPos->isDebugValue()); +void RegPressureTracker::recede(const RegisterOperands &RegOpers, + SmallVectorImpl<unsigned> *LiveUses) { assert(!CurrPos->isDebugValue()); - SlotIndex SlotIdx; - if (RequireIntervals) - SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); - - // Open the top of the region using slot indexes. - if (RequireIntervals && isTopClosed()) - static_cast<IntervalPressure&>(P).openTop(SlotIdx); - - const MachineInstr &MI = *CurrPos; - RegisterOperands RegOpers; - RegOpers.collect(MI, *TRI, *MRI); - if (RequireIntervals) - RegOpers.detectDeadDefs(MI, *LIS); - - if (PDiff) - collectPDiff(*PDiff, RegOpers, MRI); - // Boost pressure for all dead defs together. increaseRegPressure(RegOpers.DeadDefs); decreaseRegPressure(RegOpers.DeadDefs); @@ -560,6 +516,10 @@ void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses, discoverLiveOut(Reg); } + SlotIndex SlotIdx; + if (RequireIntervals) + SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); + // Generate liveness for uses. for (unsigned Reg : RegOpers.Uses) { if (!LiveRegs.contains(Reg)) { @@ -586,6 +546,41 @@ void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses, } } +void RegPressureTracker::recedeSkipDebugValues() { + assert(CurrPos != MBB->begin()); + if (!isBottomClosed()) + closeBottom(); + + // Open the top of the region using block iterators. + if (!RequireIntervals && isTopClosed()) + static_cast<RegionPressure&>(P).openTop(CurrPos); + + // Find the previous instruction. + do + --CurrPos; + while (CurrPos != MBB->begin() && CurrPos->isDebugValue()); + + SlotIndex SlotIdx; + if (RequireIntervals) + SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); + + // Open the top of the region using slot indexes. + if (RequireIntervals && isTopClosed()) + static_cast<IntervalPressure&>(P).openTop(SlotIdx); +} + +void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses) { + recedeSkipDebugValues(); + + const MachineInstr &MI = *CurrPos; + RegisterOperands RegOpers; + RegOpers.collect(MI, *TRI, *MRI); + if (RequireIntervals) + RegOpers.detectDeadDefs(MI, *LIS); + + recede(RegOpers, LiveUses); +} + /// Advance across the current instruction. void RegPressureTracker::advance() { assert(!TrackUntiedDefs && "unsupported mode"); diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index fb82ab7..11b246a 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -896,11 +896,16 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, assert(SU && "No SUnit mapped to this MI"); if (RPTracker) { - PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : nullptr; - RPTracker->recede(/*LiveUses=*/nullptr, PDiff); - assert(RPTracker->getPos() == std::prev(MII) && - "RPTracker can't find MI"); collectVRegUses(SU); + + RegisterOperands RegOpers; + RegOpers.collect(*MI, *TRI, MRI); + if (PDiffs != nullptr) + PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI); + + RPTracker->recedeSkipDebugValues(); + assert(&*RPTracker->getPos() == MI && "RPTracker in sync"); + RPTracker->recede(RegOpers); } assert( @@ -1005,6 +1010,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, I->second[i], RejectMemNodes, TrueMemOrderLatency); } + // This call must come after calls to addChainDependency() since it + // consumes the 'RejectMemNodes' list that addChainDependency() possibly + // adds to. adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); PendingLoads.clear(); @@ -1086,6 +1094,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, RejectMemNodes); } + // This call must come after calls to addChainDependency() since it + // consumes the 'RejectMemNodes' list that addChainDependency() possibly + // adds to. adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); } else if (MI->mayLoad()) { @@ -1133,13 +1144,16 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, else NonAliasMemUses[V].push_back(SU); } - if (MayAlias) - adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, - RejectMemNodes, /*Latency=*/0); // Add dependencies on alias and barrier chains, if needed. if (MayAlias && AliasChain) addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, RejectMemNodes); + if (MayAlias) + // This call must come after calls to addChainDependency() since it + // consumes the 'RejectMemNodes' list that addChainDependency() + // possibly adds to. + adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, + RejectMemNodes, /*Latency=*/0); if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Barrier)); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index bc2405b9..c741982 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7325,6 +7325,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // fold (bitcast (fneg x)) -> // flipbit = signbit // (xor (bitcast x) (build_pair flipbit, flipbit)) + // // fold (bitcast (fabs x)) -> // flipbit = (and (extract_element (bitcast x), 0), signbit) // (xor (bitcast x) (build_pair flipbit, flipbit)) @@ -8794,20 +8795,21 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { ZeroCmp, Zero, RV); } +/// copysign(x, fp_extend(y)) -> copysign(x, y) +/// copysign(x, fp_round(y)) -> copysign(x, y) static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { - // copysign(x, fp_extend(y)) -> copysign(x, y) - // copysign(x, fp_round(y)) -> copysign(x, y) - // Do not optimize out type conversion of f128 type yet. - // For some target like x86_64, configuration is changed - // to keep one f128 value in one SSE register, but - // instruction selection cannot handle FCOPYSIGN on - // SSE registers yet. SDValue N1 = N->getOperand(1); - EVT N1VT = N1->getValueType(0); - EVT N1Op0VT = N1->getOperand(0)->getValueType(0); - return (N1.getOpcode() == ISD::FP_EXTEND || - N1.getOpcode() == ISD::FP_ROUND) && - (N1VT == N1Op0VT || N1Op0VT != MVT::f128); + if ((N1.getOpcode() == ISD::FP_EXTEND || + N1.getOpcode() == ISD::FP_ROUND)) { + // Do not optimize out type conversion of f128 type yet. + // For some targets like x86_64, configuration is changed to keep one f128 + // value in one SSE register, but instruction selection cannot handle + // FCOPYSIGN on SSE registers yet. + EVT N1VT = N1->getValueType(0); + EVT N1Op0VT = N1->getOperand(0)->getValueType(0); + return (N1VT == N1Op0VT || N1Op0VT != MVT::f128); + } + return false; } SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index b62bd2b..08815ed 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -297,8 +297,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, else if (Personality == EHPersonality::CoreCLR) calculateClrEHStateNumbers(&fn, EHInfo); - calculateCatchReturnSuccessorColors(&fn, EHInfo); - // Map all BB references in the WinEH data to MBBs. for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { for (WinEHHandlerType &H : TBME.HandlerArray) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f46767f..5d572c4 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2941,6 +2941,18 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // This trivially expands to CTLZ. return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op); case ISD::CTLZ: { + EVT VT = Op.getValueType(); + unsigned len = VT.getSizeInBits(); + + if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) { + EVT SetCCVT = getSetCCResultType(VT); + SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); + return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, + DAG.getConstant(len, dl, VT), CTLZ); + } + // for now, we do this: // x = x | (x >> 1); // x = x | (x >> 2); @@ -2950,9 +2962,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // return popcount(~x); // // Ref: "Hacker's Delight" by Henry Warren - EVT VT = Op.getValueType(); EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - unsigned len = VT.getSizeInBits(); for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT); Op = DAG.getNode(ISD::OR, dl, VT, Op, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index cd114d6..74f80db 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -262,12 +262,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp)); case TargetLowering::TypePromoteFloat: { // Convert the promoted float by hand. - if (NOutVT.bitsEq(NInVT)) { - SDValue PromotedOp = GetPromotedFloat(InOp); - SDValue Trunc = DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp); - return DAG.getNode(ISD::AssertZext, dl, NOutVT, Trunc, - DAG.getValueType(OutVT)); - } + SDValue PromotedOp = GetPromotedFloat(InOp); + return DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp); break; } case TargetLowering::TypeExpandInteger: diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e446a93..45ae39a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1205,8 +1205,13 @@ void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { // Figure out the funclet membership for the catchret's successor. // This will be used by the FuncletLayout pass to determine how to order the // BB's. - WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); - const BasicBlock *SuccessorColor = EHInfo->CatchRetSuccessorColorMap[&I]; + // A 'catchret' returns to the outer scope's color. + Value *ParentPad = I.getParentPad(); + const BasicBlock *SuccessorColor; + if (isa<ConstantTokenNone>(ParentPad)) + SuccessorColor = &FuncInfo.Fn->getEntryBlock(); + else + SuccessorColor = cast<Instruction>(ParentPad)->getParent(); assert(SuccessorColor && "No parent funclet for catchret!"); MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor]; assert(SuccessorColorMBB && "No MBB for SuccessorColor!"); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 6547a62..02545a7 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -461,7 +461,9 @@ static void lowerIncomingStatepointValue(SDValue Incoming, // If the original value was a constant, make sure it gets recorded as // such in the stackmap. This is required so that the consumer can // parse any internal format to the deopt state. It also handles null - // pointers and other constant pointers in GC states + // pointers and other constant pointers in GC states. Note the constant + // vectors do not appear to actually hit this path and that anything larger + // than an i64 value (not type!) will fail asserts here. pushStackMapConstant(Ops, Builder, C->getSExtValue()); } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { // This handles allocas as arguments to the statepoint (this is only @@ -505,27 +507,27 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, #ifndef NDEBUG // Check that each of the gc pointer and bases we've gotten out of the - // safepoint is something the strategy thinks might be a pointer into the GC - // heap. This is basically just here to help catch errors during statepoint - // insertion. TODO: This should actually be in the Verifier, but we can't get - // to the GCStrategy from there (yet). + // safepoint is something the strategy thinks might be a pointer (or vector + // of pointers) into the GC heap. This is basically just here to help catch + // errors during statepoint insertion. TODO: This should actually be in the + // Verifier, but we can't get to the GCStrategy from there (yet). GCStrategy &S = Builder.GFI->getStrategy(); for (const Value *V : Bases) { - auto Opt = S.isGCManagedPointer(V->getType()); + auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); if (Opt.hasValue()) { assert(Opt.getValue() && "non gc managed base pointer found in statepoint"); } } for (const Value *V : Ptrs) { - auto Opt = S.isGCManagedPointer(V->getType()); + auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); if (Opt.hasValue()) { assert(Opt.getValue() && "non gc managed derived pointer found in statepoint"); } } for (const Value *V : Relocations) { - auto Opt = S.isGCManagedPointer(V->getType()); + auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); if (Opt.hasValue()) { assert(Opt.getValue() && "non gc managed pointer relocated"); } diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp index f8aa1e2..d361a6c 100644 --- a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -47,6 +47,7 @@ // MachineFrameInfo is updated with this information. //===----------------------------------------------------------------------===// #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" // To check for profitability. @@ -263,6 +264,8 @@ MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs, if (!IDom) break; } + if (IDom == &Block) + return nullptr; return IDom; } @@ -352,13 +355,9 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) { // Push Save outside of this loop if immediate dominator is different // from save block. If immediate dominator is not different, bail out. - MachineBasicBlock *IDom = FindIDom<>(*Save, Save->predecessors(), *MDT); - if (IDom != Save) - Save = IDom; - else { - Save = nullptr; + Save = FindIDom<>(*Save, Save->predecessors(), *MDT); + if (!Save) break; - } } else { // If the loop does not exit, there is no point in looking // for a post-dominator outside the loop. @@ -386,6 +385,41 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, } } +/// Check whether the edge (\p SrcBB, \p DestBB) is a backedge according to MLI. +/// I.e., check if it exists a loop that contains SrcBB and where DestBB is the +/// loop header. +static bool isProperBackedge(const MachineLoopInfo &MLI, + const MachineBasicBlock *SrcBB, + const MachineBasicBlock *DestBB) { + for (const MachineLoop *Loop = MLI.getLoopFor(SrcBB); Loop; + Loop = Loop->getParentLoop()) { + if (Loop->getHeader() == DestBB) + return true; + } + return false; +} + +/// Check if the CFG of \p MF is irreducible. +static bool isIrreducibleCFG(const MachineFunction &MF, + const MachineLoopInfo &MLI) { + const MachineBasicBlock *Entry = &*MF.begin(); + ReversePostOrderTraversal<const MachineBasicBlock *> RPOT(Entry); + BitVector VisitedBB(MF.getNumBlockIDs()); + for (const MachineBasicBlock *MBB : RPOT) { + VisitedBB.set(MBB->getNumber()); + for (const MachineBasicBlock *SuccBB : MBB->successors()) { + if (!VisitedBB.test(SuccBB->getNumber())) + continue; + // We already visited SuccBB, thus MBB->SuccBB must be a backedge. + // Check that the head matches what we have in the loop information. + // Otherwise, we have an irreducible graph. + if (!isProperBackedge(MLI, MBB, SuccBB)) + return true; + } + } + return false; +} + bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { if (MF.empty() || !isShrinkWrapEnabled(MF)) return false; @@ -394,6 +428,17 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { init(MF); + if (isIrreducibleCFG(MF, *MLI)) { + // If MF is irreducible, a block may be in a loop without + // MachineLoopInfo reporting it. I.e., we may use the + // post-dominance property in loops, which lead to incorrect + // results. Moreover, we may miss that the prologue and + // epilogue are not in the same loop, leading to unbalanced + // construction/deconstruction of the stack frame. + DEBUG(dbgs() << "Irreducible CFGs are not supported yet\n"); + return false; + } + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); std::unique_ptr<RegScavenger> RS( TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr); diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp index 3541b33..7b52038 100644 --- a/contrib/llvm/lib/CodeGen/StackColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp @@ -43,6 +43,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -570,6 +571,14 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { } } + // Update the location of C++ catch objects for the MSVC personality routine. + if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo()) + for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap) + for (WinEHHandlerType &H : TBME.HandlerArray) + if (H.CatchObj.FrameIndex != INT_MAX && + SlotRemap.count(H.CatchObj.FrameIndex)) + H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex]; + DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n"); DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n"); DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n"); diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp index 2426c27..886c5f6 100644 --- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -144,10 +144,11 @@ static void addTryBlockMapEntry(WinEHFuncInfo &FuncInfo, int TryLow, HT.TypeDescriptor = cast<GlobalVariable>(TypeInfo->stripPointerCasts()); HT.Adjectives = cast<ConstantInt>(CPI->getArgOperand(1))->getZExtValue(); HT.Handler = CPI->getParent(); - if (isa<ConstantPointerNull>(CPI->getArgOperand(2))) - HT.CatchObj.Alloca = nullptr; + if (auto *AI = + dyn_cast<AllocaInst>(CPI->getArgOperand(2)->stripPointerCasts())) + HT.CatchObj.Alloca = AI; else - HT.CatchObj.Alloca = cast<AllocaInst>(CPI->getArgOperand(2)); + HT.CatchObj.Alloca = nullptr; TBME.HandlerArray.push_back(HT); } FuncInfo.TryBlockMap.push_back(TBME); @@ -664,24 +665,6 @@ void WinEHPrepare::colorFunclets(Function &F) { } } -void llvm::calculateCatchReturnSuccessorColors(const Function *Fn, - WinEHFuncInfo &FuncInfo) { - for (const BasicBlock &BB : *Fn) { - const auto *CatchRet = dyn_cast<CatchReturnInst>(BB.getTerminator()); - if (!CatchRet) - continue; - // A 'catchret' returns to the outer scope's color. - Value *ParentPad = CatchRet->getParentPad(); - const BasicBlock *Color; - if (isa<ConstantTokenNone>(ParentPad)) - Color = &Fn->getEntryBlock(); - else - Color = cast<Instruction>(ParentPad)->getParent(); - // Record the catchret successor's funclet membership. - FuncInfo.CatchRetSuccessorColorMap[CatchRet] = Color; - } -} - void WinEHPrepare::demotePHIsOnFunclets(Function &F) { // Strip PHI nodes off of EH pads. SmallVector<PHINode *, 16> PHINodes; diff --git a/contrib/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/contrib/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp index c6bfbc0..a9dee7a 100644 --- a/contrib/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +++ b/contrib/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp @@ -15,6 +15,7 @@ #include "llvm/DebugInfo/Symbolize/DIPrinter.h" #include "llvm/DebugInfo/DIContext.h" +#include "llvm/Support/LineIterator.h" namespace llvm { namespace symbolize { @@ -24,7 +25,37 @@ namespace symbolize { static const char kDILineInfoBadString[] = "<invalid>"; static const char kBadString[] = "??"; -void DIPrinter::printName(const DILineInfo &Info, bool Inlined) { +// Prints source code around in the FileName the Line. +void DIPrinter::printContext(std::string FileName, int64_t Line) { + if (PrintSourceContext <= 0) + return; + + ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr = + MemoryBuffer::getFile(FileName); + if (!BufOrErr) + return; + + std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get()); + int64_t FirstLine = + std::max(static_cast<int64_t>(1), Line - PrintSourceContext / 2); + int64_t LastLine = FirstLine + PrintSourceContext; + size_t MaxLineNumberWidth = std::ceil(std::log10(LastLine)); + + for (line_iterator I = line_iterator(*Buf, false); + !I.is_at_eof() && I.line_number() <= LastLine; ++I) { + int64_t L = I.line_number(); + if (L >= FirstLine && L <= LastLine) { + OS << format_decimal(L, MaxLineNumberWidth); + if (L == Line) + OS << " >: "; + else + OS << " : "; + OS << *I << "\n"; + } + } +} + +void DIPrinter::print(const DILineInfo &Info, bool Inlined) { if (PrintFunctionNames) { std::string FunctionName = Info.FunctionName; if (FunctionName == kDILineInfoBadString) @@ -38,21 +69,22 @@ void DIPrinter::printName(const DILineInfo &Info, bool Inlined) { if (Filename == kDILineInfoBadString) Filename = kBadString; OS << Filename << ":" << Info.Line << ":" << Info.Column << "\n"; + printContext(Filename, Info.Line); } DIPrinter &DIPrinter::operator<<(const DILineInfo &Info) { - printName(Info, false); + print(Info, false); return *this; } DIPrinter &DIPrinter::operator<<(const DIInliningInfo &Info) { uint32_t FramesNum = Info.getNumberOfFrames(); if (FramesNum == 0) { - printName(DILineInfo(), false); + print(DILineInfo(), false); return *this; } for (uint32_t i = 0; i < FramesNum; i++) - printName(Info.getFrame(i), i > 0); + print(Info.getFrame(i), i > 0); return *this; } diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp index b931f10..01e829f 100644 --- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp +++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp @@ -1,4 +1,4 @@ -//===------- OrcTargetSupport.cpp - Target support utilities for Orc ------===// +//===------ OrcArchSupport.cpp - Architecture specific support code -------===// // // The LLVM Compiler Infrastructure // @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Triple.h" -#include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h" +#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h" #include "llvm/Support/Process.h" #include <array> diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp index e519c7f..956daae 100644 --- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp +++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp @@ -9,7 +9,7 @@ #include "OrcCBindingsStack.h" -#include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h" +#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DynamicLibrary.h" #include <cstdio> diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h index 2e17624..aae6a99 100644 --- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h +++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h @@ -221,7 +221,8 @@ public: ModuleHandleT addIRModuleLazy(Module* M, LLVMOrcSymbolResolverFn ExternalResolver, void *ExternalResolverCtx) { - return addIRModule(CODLayer, std::move(M), nullptr, + return addIRModule(CODLayer, std::move(M), + llvm::make_unique<SectionMemoryManager>(), std::move(ExternalResolver), ExternalResolverCtx); } diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcError.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcError.cpp new file mode 100644 index 0000000..e95115e --- /dev/null +++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcError.cpp @@ -0,0 +1,57 @@ +//===---------------- OrcError.cpp - Error codes for ORC ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Error codes for ORC. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/OrcError.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" + +using namespace llvm; +using namespace llvm::orc; + +namespace { + +class OrcErrorCategory : public std::error_category { +public: + const char *name() const LLVM_NOEXCEPT override { return "orc"; } + + std::string message(int condition) const override { + switch (static_cast<OrcErrorCode>(condition)) { + case OrcErrorCode::RemoteAllocatorDoesNotExist: + return "Remote allocator does not exist"; + case OrcErrorCode::RemoteAllocatorIdAlreadyInUse: + return "Remote allocator Id already in use"; + case OrcErrorCode::RemoteMProtectAddrUnrecognized: + return "Remote mprotect call references unallocated memory"; + case OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist: + return "Remote indirect stubs owner does not exist"; + case OrcErrorCode::RemoteIndirectStubsOwnerIdAlreadyInUse: + return "Remote indirect stubs owner Id already in use"; + case OrcErrorCode::UnexpectedRPCCall: + return "Unexpected RPC call"; + } + llvm_unreachable("Unhandled error code"); + } +}; + +static ManagedStatic<OrcErrorCategory> OrcErrCat; +} + +namespace llvm { +namespace orc { + +std::error_code orcError(OrcErrorCode ErrCode) { + typedef std::underlying_type<OrcErrorCode>::type UT; + return std::error_code(static_cast<UT>(ErrCode), *OrcErrCat); +} +} +} diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h index 38a27cf..2ab70a9 100644 --- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h +++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h @@ -54,10 +54,13 @@ class OrcMCJITReplacement : public ExecutionEngine { return Addr; } - void reserveAllocationSpace(uintptr_t CodeSize, uintptr_t DataSizeRO, - uintptr_t DataSizeRW) override { - return ClientMM->reserveAllocationSpace(CodeSize, DataSizeRO, - DataSizeRW); + void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign, + uintptr_t RODataSize, uint32_t RODataAlign, + uintptr_t RWDataSize, + uint32_t RWDataAlign) override { + return ClientMM->reserveAllocationSpace(CodeSize, CodeAlign, + RODataSize, RODataAlign, + RWDataSize, RWDataAlign); } bool needsToReserveAllocationSpace() override { @@ -74,6 +77,11 @@ class OrcMCJITReplacement : public ExecutionEngine { return ClientMM->deregisterEHFrames(Addr, LoadAddr, Size); } + void notifyObjectLoaded(RuntimeDyld &RTDyld, + const object::ObjectFile &O) override { + return ClientMM->notifyObjectLoaded(RTDyld, O); + } + void notifyObjectLoaded(ExecutionEngine *EE, const object::ObjectFile &O) override { return ClientMM->notifyObjectLoaded(EE, O); diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp new file mode 100644 index 0000000..064633b --- /dev/null +++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp @@ -0,0 +1,83 @@ +//===------- OrcRemoteTargetRPCAPI.cpp - ORC Remote API utilities ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h" + +namespace llvm { +namespace orc { +namespace remote { + +const char *OrcRemoteTargetRPCAPI::getJITProcIdName(JITProcId Id) { + switch (Id) { + case InvalidId: + return "*** Invalid JITProcId ***"; + case CallIntVoidId: + return "CallIntVoid"; + case CallIntVoidResponseId: + return "CallIntVoidResponse"; + case CallMainId: + return "CallMain"; + case CallMainResponseId: + return "CallMainResponse"; + case CallVoidVoidId: + return "CallVoidVoid"; + case CallVoidVoidResponseId: + return "CallVoidVoidResponse"; + case CreateRemoteAllocatorId: + return "CreateRemoteAllocator"; + case CreateIndirectStubsOwnerId: + return "CreateIndirectStubsOwner"; + case DestroyRemoteAllocatorId: + return "DestroyRemoteAllocator"; + case DestroyIndirectStubsOwnerId: + return "DestroyIndirectStubsOwner"; + case EmitIndirectStubsId: + return "EmitIndirectStubs"; + case EmitIndirectStubsResponseId: + return "EmitIndirectStubsResponse"; + case EmitResolverBlockId: + return "EmitResolverBlock"; + case EmitTrampolineBlockId: + return "EmitTrampolineBlock"; + case EmitTrampolineBlockResponseId: + return "EmitTrampolineBlockResponse"; + case GetSymbolAddressId: + return "GetSymbolAddress"; + case GetSymbolAddressResponseId: + return "GetSymbolAddressResponse"; + case GetRemoteInfoId: + return "GetRemoteInfo"; + case GetRemoteInfoResponseId: + return "GetRemoteInfoResponse"; + case ReadMemId: + return "ReadMem"; + case ReadMemResponseId: + return "ReadMemResponse"; + case ReserveMemId: + return "ReserveMem"; + case ReserveMemResponseId: + return "ReserveMemResponse"; + case RequestCompileId: + return "RequestCompile"; + case RequestCompileResponseId: + return "RequestCompileResponse"; + case SetProtectionsId: + return "SetProtections"; + case TerminateSessionId: + return "TerminateSession"; + case WriteMemId: + return "WriteMem"; + case WritePtrId: + return "WritePtr"; + }; + return nullptr; +} +} +} +} diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index a95f3bb..d16b2db 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -146,9 +146,12 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { // Compute the memory size required to load all sections to be loaded // and pass this information to the memory manager if (MemMgr.needsToReserveAllocationSpace()) { - uint64_t CodeSize = 0, DataSizeRO = 0, DataSizeRW = 0; - computeTotalAllocSize(Obj, CodeSize, DataSizeRO, DataSizeRW); - MemMgr.reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW); + uint64_t CodeSize = 0, RODataSize = 0, RWDataSize = 0; + uint32_t CodeAlign = 1, RODataAlign = 1, RWDataAlign = 1; + computeTotalAllocSize(Obj, CodeSize, CodeAlign, RODataSize, RODataAlign, + RWDataSize, RWDataAlign); + MemMgr.reserveAllocationSpace(CodeSize, CodeAlign, RODataSize, RODataAlign, + RWDataSize, RWDataAlign); } // Used sections from the object file @@ -335,13 +338,15 @@ static bool isZeroInit(const SectionRef Section) { // sections void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj, uint64_t &CodeSize, - uint64_t &DataSizeRO, - uint64_t &DataSizeRW) { + uint32_t &CodeAlign, + uint64_t &RODataSize, + uint32_t &RODataAlign, + uint64_t &RWDataSize, + uint32_t &RWDataAlign) { // Compute the size of all sections required for execution std::vector<uint64_t> CodeSectionSizes; std::vector<uint64_t> ROSectionSizes; std::vector<uint64_t> RWSectionSizes; - uint64_t MaxAlignment = sizeof(void *); // Collect sizes of all sections to be loaded; // also determine the max alignment of all sections @@ -376,17 +381,15 @@ void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj, SectionSize = 1; if (IsCode) { + CodeAlign = std::max(CodeAlign, Alignment); CodeSectionSizes.push_back(SectionSize); } else if (IsReadOnly) { + RODataAlign = std::max(RODataAlign, Alignment); ROSectionSizes.push_back(SectionSize); } else { + RWDataAlign = std::max(RWDataAlign, Alignment); RWSectionSizes.push_back(SectionSize); } - - // update the max alignment - if (Alignment > MaxAlignment) { - MaxAlignment = Alignment; - } } } @@ -410,9 +413,9 @@ void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj, // allocated with the max alignment. Note that we cannot compute with the // individual alignments of the sections, because then the required size // depends on the order, in which the sections are allocated. - CodeSize = computeAllocationSizeForSections(CodeSectionSizes, MaxAlignment); - DataSizeRO = computeAllocationSizeForSections(ROSectionSizes, MaxAlignment); - DataSizeRW = computeAllocationSizeForSections(RWSectionSizes, MaxAlignment); + CodeSize = computeAllocationSizeForSections(CodeSectionSizes, CodeAlign); + RODataSize = computeAllocationSizeForSections(ROSectionSizes, RODataAlign); + RWDataSize = computeAllocationSizeForSections(RWSectionSizes, RWDataAlign); } // compute stub buffer size for the given section @@ -937,7 +940,9 @@ RuntimeDyld::loadObject(const ObjectFile &Obj) { if (!Dyld->isCompatibleFile(Obj)) report_fatal_error("Incompatible object format!"); - return Dyld->loadObject(Obj); + auto LoadedObjInfo = Dyld->loadObject(Obj); + MemMgr.notifyObjectLoaded(*this, Obj); + return LoadedObjInfo; } void *RuntimeDyld::getSymbolLocalAddress(StringRef Name) const { @@ -967,6 +972,17 @@ bool RuntimeDyld::hasError() { return Dyld->hasError(); } StringRef RuntimeDyld::getErrorString() { return Dyld->getErrorString(); } +void RuntimeDyld::finalizeWithMemoryManagerLocking() { + bool MemoryFinalizationLocked = MemMgr.FinalizationLocked; + MemMgr.FinalizationLocked = true; + resolveRelocations(); + registerEHFrames(); + if (!MemoryFinalizationLocked) { + MemMgr.finalizeMemory(); + MemMgr.FinalizationLocked = false; + } +} + void RuntimeDyld::registerEHFrames() { if (Dyld) Dyld->registerEHFrames(); diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index dafd3c8..ab732c6 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -411,8 +411,10 @@ protected: // \brief Compute an upper bound of the memory that is required to load all // sections - void computeTotalAllocSize(const ObjectFile &Obj, uint64_t &CodeSize, - uint64_t &DataSizeRO, uint64_t &DataSizeRW); + void computeTotalAllocSize(const ObjectFile &Obj, + uint64_t &CodeSize, uint32_t &CodeAlign, + uint64_t &RODataSize, uint32_t &RODataAlign, + uint64_t &RWDataSize, uint32_t &RWDataAlign); // \brief Compute the stub buffer size required for a section unsigned computeSectionStubBufSize(const ObjectFile &Obj, diff --git a/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp index e2f2208..1ad5f17 100644 --- a/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp +++ b/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp @@ -137,9 +137,6 @@ bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg) return true; } - // Don't allow free memory blocks to be used after setting protection flags. - RODataMem.FreeMem.clear(); - // Make read-only data memory read-only. ec = applyMemoryGroupPermissions(RODataMem, sys::Memory::MF_READ | sys::Memory::MF_EXEC); diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp index 1ebe9b7..0ce44e1 100644 --- a/contrib/llvm/lib/IR/AsmWriter.cpp +++ b/contrib/llvm/lib/IR/AsmWriter.cpp @@ -3121,7 +3121,7 @@ void AssemblyWriter::printMetadataAttachments( return; if (MDNames.empty()) - TheModule->getMDKindNames(MDNames); + MDs[0].second->getContext().getMDKindNames(MDNames); for (const auto &I : MDs) { unsigned Kind = I.first; diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp index 7f39c80..591dafa 100644 --- a/contrib/llvm/lib/IR/Core.cpp +++ b/contrib/llvm/lib/IR/Core.cpp @@ -1722,7 +1722,7 @@ void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC) { const char *LLVMGetGC(LLVMValueRef Fn) { Function *F = unwrap<Function>(Fn); - return F->hasGC()? F->getGC() : nullptr; + return F->hasGC()? F->getGC().c_str() : nullptr; } void LLVMSetGC(LLVMValueRef Fn, const char *GC) { diff --git a/contrib/llvm/lib/IR/Function.cpp b/contrib/llvm/lib/IR/Function.cpp index cfb40b1..cfdfc40 100644 --- a/contrib/llvm/lib/IR/Function.cpp +++ b/contrib/llvm/lib/IR/Function.cpp @@ -366,47 +366,21 @@ void Function::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) { setAttributes(PAL); } -// Maintain the GC name for each function in an on-the-side table. This saves -// allocating an additional word in Function for programs which do not use GC -// (i.e., most programs) at the cost of increased overhead for clients which do -// use GC. -static DenseMap<const Function*,PooledStringPtr> *GCNames; -static StringPool *GCNamePool; -static ManagedStatic<sys::SmartRWMutex<true> > GCLock; - -bool Function::hasGC() const { - sys::SmartScopedReader<true> Reader(*GCLock); - return GCNames && GCNames->count(this); -} - -const char *Function::getGC() const { +const std::string &Function::getGC() const { assert(hasGC() && "Function has no collector"); - sys::SmartScopedReader<true> Reader(*GCLock); - return *(*GCNames)[this]; + return getContext().getGC(*this); } -void Function::setGC(const char *Str) { - sys::SmartScopedWriter<true> Writer(*GCLock); - if (!GCNamePool) - GCNamePool = new StringPool(); - if (!GCNames) - GCNames = new DenseMap<const Function*,PooledStringPtr>(); - (*GCNames)[this] = GCNamePool->intern(Str); +void Function::setGC(const std::string Str) { + setValueSubclassDataBit(14, !Str.empty()); + getContext().setGC(*this, std::move(Str)); } void Function::clearGC() { - sys::SmartScopedWriter<true> Writer(*GCLock); - if (GCNames) { - GCNames->erase(this); - if (GCNames->empty()) { - delete GCNames; - GCNames = nullptr; - if (GCNamePool->empty()) { - delete GCNamePool; - GCNamePool = nullptr; - } - } - } + if (!hasGC()) + return; + getContext().deleteGC(*this); + setValueSubclassDataBit(14, false); } /// Copy all additional attributes (those not needed to create a Function) from diff --git a/contrib/llvm/lib/IR/IRPrintingPasses.cpp b/contrib/llvm/lib/IR/IRPrintingPasses.cpp index c1ac336..822dbeb 100644 --- a/contrib/llvm/lib/IR/IRPrintingPasses.cpp +++ b/contrib/llvm/lib/IR/IRPrintingPasses.cpp @@ -28,7 +28,13 @@ PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner, PreservedAnalyses PrintModulePass::run(Module &M) { OS << Banner; - M.print(OS, nullptr, ShouldPreserveUseListOrder); + if (llvm::isFunctionInPrintList("*")) + M.print(OS, nullptr, ShouldPreserveUseListOrder); + else { + for(const auto &F : M.functions()) + if (llvm::isFunctionInPrintList(F.getName())) + F.print(OS); + } return PreservedAnalyses::all(); } @@ -37,7 +43,8 @@ PrintFunctionPass::PrintFunctionPass(raw_ostream &OS, const std::string &Banner) : OS(OS), Banner(Banner) {} PreservedAnalyses PrintFunctionPass::run(Function &F) { - OS << Banner << static_cast<Value &>(F); + if (isFunctionInPrintList(F.getName())) + OS << Banner << static_cast<Value &>(F); return PreservedAnalyses::all(); } diff --git a/contrib/llvm/lib/IR/LLVMContext.cpp b/contrib/llvm/lib/IR/LLVMContext.cpp index 8848bcb..48b53b0 100644 --- a/contrib/llvm/lib/IR/LLVMContext.cpp +++ b/contrib/llvm/lib/IR/LLVMContext.cpp @@ -304,3 +304,19 @@ void LLVMContext::getOperandBundleTags(SmallVectorImpl<StringRef> &Tags) const { uint32_t LLVMContext::getOperandBundleTagID(StringRef Tag) const { return pImpl->getOperandBundleTagID(Tag); } + +void LLVMContext::setGC(const Function &Fn, std::string GCName) { + auto It = pImpl->GCNames.find(&Fn); + + if (It == pImpl->GCNames.end()) { + pImpl->GCNames.insert(std::make_pair(&Fn, std::move(GCName))); + return; + } + It->second = std::move(GCName); +} +const std::string &LLVMContext::getGC(const Function &Fn) { + return pImpl->GCNames[&Fn]; +} +void LLVMContext::deleteGC(const Function &Fn) { + pImpl->GCNames.erase(&Fn); +} diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.h b/contrib/llvm/lib/IR/LLVMContextImpl.h index a24114d..d42047d 100644 --- a/contrib/llvm/lib/IR/LLVMContextImpl.h +++ b/contrib/llvm/lib/IR/LLVMContextImpl.h @@ -1027,6 +1027,13 @@ public: void getOperandBundleTags(SmallVectorImpl<StringRef> &Tags) const; uint32_t getOperandBundleTagID(StringRef Tag) const; + /// Maintain the GC name for each function. + /// + /// This saves allocating an additional word in Function for programs which + /// do not use GC (i.e., most programs) at the cost of increased overhead for + /// clients which do use GC. + DenseMap<const Function*, std::string> GCNames; + LLVMContextImpl(LLVMContext &C); ~LLVMContextImpl(); diff --git a/contrib/llvm/lib/IR/LegacyPassManager.cpp b/contrib/llvm/lib/IR/LegacyPassManager.cpp index f2e0c7d..63d89f2 100644 --- a/contrib/llvm/lib/IR/LegacyPassManager.cpp +++ b/contrib/llvm/lib/IR/LegacyPassManager.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <map> +#include <unordered_set> using namespace llvm; using namespace llvm::legacy; @@ -83,6 +84,13 @@ PrintAfterAll("print-after-all", llvm::cl::desc("Print IR after each pass"), cl::init(false)); +static cl::list<std::string> + PrintFuncsList("filter-print-funcs", cl::value_desc("function names"), + cl::desc("Only print IR for functions whose name " + "match this for all print-[before|after][-all] " + "options"), + cl::CommaSeparated); + /// This is a helper to determine whether to print IR before or /// after a pass. @@ -109,6 +117,11 @@ static bool ShouldPrintAfterPass(const PassInfo *PI) { return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PI, PrintAfter); } +bool llvm::isFunctionInPrintList(StringRef FunctionName) { + static std::unordered_set<std::string> PrintFuncNames(PrintFuncsList.begin(), + PrintFuncsList.end()); + return PrintFuncNames.empty() || PrintFuncNames.count(FunctionName); +} /// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions /// or higher is specified. bool PMDataManager::isPassDebuggingExecutionsOrMore() const { diff --git a/contrib/llvm/lib/IR/Metadata.cpp b/contrib/llvm/lib/IR/Metadata.cpp index d8eaceb..9a9a501 100644 --- a/contrib/llvm/lib/IR/Metadata.cpp +++ b/contrib/llvm/lib/IR/Metadata.cpp @@ -557,7 +557,7 @@ void MDNode::decrementUnresolvedOperandCount() { resolve(); } -void MDNode::resolveCycles(bool AllowTemps) { +void MDNode::resolveRecursivelyImpl(bool AllowTemps) { if (isResolved()) return; diff --git a/contrib/llvm/lib/IR/Verifier.cpp b/contrib/llvm/lib/IR/Verifier.cpp index 6dfb05d..9198b0e 100644 --- a/contrib/llvm/lib/IR/Verifier.cpp +++ b/contrib/llvm/lib/IR/Verifier.cpp @@ -45,6 +45,7 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/Verifier.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -145,6 +146,11 @@ private: OS << *C; } + template <typename T> void Write(ArrayRef<T> Vs) { + for (const T &V : Vs) + Write(V); + } + template <typename T1, typename... Ts> void WriteTs(const T1 &V1, const Ts &... Vs) { Write(V1); @@ -204,6 +210,10 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport { /// given function and the largest index passed to llvm.localrecover. DenseMap<Function *, std::pair<unsigned, unsigned>> FrameEscapeInfo; + // Maps catchswitches and cleanuppads that unwind to siblings to the + // terminators that indicate the unwind, used to detect cycles therein. + MapVector<Instruction *, TerminatorInst *> SiblingFuncletInfo; + /// Cache of constants visited in search of ConstantExprs. SmallPtrSet<const Constant *, 32> ConstantExprVisited; @@ -245,9 +255,11 @@ public: Broken = false; // FIXME: We strip const here because the inst visitor strips const. visit(const_cast<Function &>(F)); + verifySiblingFuncletUnwinds(); InstsInThisBlock.clear(); LandingPadResultTy = nullptr; SawFrameEscape = false; + SiblingFuncletInfo.clear(); return !Broken; } @@ -403,6 +415,7 @@ private: void visitCatchPadInst(CatchPadInst &CPI); void visitCatchReturnInst(CatchReturnInst &CatchReturn); void visitCleanupPadInst(CleanupPadInst &CPI); + void visitFuncletPadInst(FuncletPadInst &FPI); void visitCatchSwitchInst(CatchSwitchInst &CatchSwitch); void visitCleanupReturnInst(CleanupReturnInst &CRI); @@ -428,6 +441,7 @@ private: void visitConstantExpr(const ConstantExpr *CE); void VerifyStatepoint(ImmutableCallSite CS); void verifyFrameRecoverIndices(); + void verifySiblingFuncletUnwinds(); // Module-level debug info verification... void verifyTypeRefs(); @@ -984,6 +998,9 @@ void Verifier::visitDIMacro(const DIMacro &N) { N.getMacinfoType() == dwarf::DW_MACINFO_undef, "invalid macinfo type", &N); Assert(!N.getName().empty(), "anonymous macro", &N); + if (!N.getValue().empty()) { + assert(N.getValue().data()[0] != ' ' && "Macro value has a space prefix"); + } } void Verifier::visitDIMacroFile(const DIMacroFile &N) { @@ -1693,6 +1710,59 @@ void Verifier::verifyFrameRecoverIndices() { } } +static Instruction *getSuccPad(TerminatorInst *Terminator) { + BasicBlock *UnwindDest; + if (auto *II = dyn_cast<InvokeInst>(Terminator)) + UnwindDest = II->getUnwindDest(); + else if (auto *CSI = dyn_cast<CatchSwitchInst>(Terminator)) + UnwindDest = CSI->getUnwindDest(); + else + UnwindDest = cast<CleanupReturnInst>(Terminator)->getUnwindDest(); + return UnwindDest->getFirstNonPHI(); +} + +void Verifier::verifySiblingFuncletUnwinds() { + SmallPtrSet<Instruction *, 8> Visited; + SmallPtrSet<Instruction *, 8> Active; + for (const auto &Pair : SiblingFuncletInfo) { + Instruction *PredPad = Pair.first; + if (Visited.count(PredPad)) + continue; + Active.insert(PredPad); + TerminatorInst *Terminator = Pair.second; + do { + Instruction *SuccPad = getSuccPad(Terminator); + if (Active.count(SuccPad)) { + // Found a cycle; report error + Instruction *CyclePad = SuccPad; + SmallVector<Instruction *, 8> CycleNodes; + do { + CycleNodes.push_back(CyclePad); + TerminatorInst *CycleTerminator = SiblingFuncletInfo[CyclePad]; + if (CycleTerminator != CyclePad) + CycleNodes.push_back(CycleTerminator); + CyclePad = getSuccPad(CycleTerminator); + } while (CyclePad != SuccPad); + Assert(false, "EH pads can't handle each other's exceptions", + ArrayRef<Instruction *>(CycleNodes)); + } + // Don't re-walk a node we've already checked + if (!Visited.insert(SuccPad).second) + break; + // Walk to this successor if it has a map entry. + PredPad = SuccPad; + auto TermI = SiblingFuncletInfo.find(PredPad); + if (TermI == SiblingFuncletInfo.end()) + break; + Terminator = TermI->second; + Active.insert(PredPad); + } while (true); + // Each node only has one successor, so we've walked all the active + // nodes' successors. + Active.clear(); + } +} + // visitFunction - Verify that a function is ok. // void Verifier::visitFunction(const Function &F) { @@ -2892,6 +2962,13 @@ void Verifier::visitInsertValueInst(InsertValueInst &IVI) { visitInstruction(IVI); } +static Value *getParentPad(Value *EHPad) { + if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad)) + return FPI->getParentPad(); + + return cast<CatchSwitchInst>(EHPad)->getParentPad(); +} + void Verifier::visitEHPadPredecessors(Instruction &I) { assert(I.isEHPad()); @@ -2919,16 +2996,45 @@ void Verifier::visitEHPadPredecessors(Instruction &I) { "Block containg CatchPadInst must be jumped to " "only by its catchswitch.", CPI); + Assert(BB != CPI->getCatchSwitch()->getUnwindDest(), + "Catchswitch cannot unwind to one of its catchpads", + CPI->getCatchSwitch(), CPI); return; } + // Verify that each pred has a legal terminator with a legal to/from EH + // pad relationship. + Instruction *ToPad = &I; + Value *ToPadParent = getParentPad(ToPad); for (BasicBlock *PredBB : predecessors(BB)) { TerminatorInst *TI = PredBB->getTerminator(); + Value *FromPad; if (auto *II = dyn_cast<InvokeInst>(TI)) { Assert(II->getUnwindDest() == BB && II->getNormalDest() != BB, - "EH pad must be jumped to via an unwind edge", &I, II); - } else if (!isa<CleanupReturnInst>(TI) && !isa<CatchSwitchInst>(TI)) { - Assert(false, "EH pad must be jumped to via an unwind edge", &I, TI); + "EH pad must be jumped to via an unwind edge", ToPad, II); + if (auto Bundle = II->getOperandBundle(LLVMContext::OB_funclet)) + FromPad = Bundle->Inputs[0]; + else + FromPad = ConstantTokenNone::get(II->getContext()); + } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) { + FromPad = CRI->getCleanupPad(); + Assert(FromPad != ToPadParent, "A cleanupret must exit its cleanup", CRI); + } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) { + FromPad = CSI; + } else { + Assert(false, "EH pad must be jumped to via an unwind edge", ToPad, TI); + } + + // The edge may exit from zero or more nested pads. + for (;; FromPad = getParentPad(FromPad)) { + Assert(FromPad != ToPad, + "EH pad cannot handle exceptions raised within it", FromPad, TI); + if (FromPad == ToPadParent) { + // This is a legal unwind edge. + break; + } + Assert(!isa<ConstantTokenNone>(FromPad), + "A single unwind edge may only enter one EH pad", TI); } } } @@ -2992,7 +3098,7 @@ void Verifier::visitCatchPadInst(CatchPadInst &CPI) { Assert(BB->getFirstNonPHI() == &CPI, "CatchPadInst not the first non-PHI instruction in the block.", &CPI); - visitInstruction(CPI); + visitFuncletPadInst(CPI); } void Verifier::visitCatchReturnInst(CatchReturnInst &CatchReturn) { @@ -3022,33 +3128,160 @@ void Verifier::visitCleanupPadInst(CleanupPadInst &CPI) { Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad), "CleanupPadInst has an invalid parent.", &CPI); + visitFuncletPadInst(CPI); +} + +void Verifier::visitFuncletPadInst(FuncletPadInst &FPI) { User *FirstUser = nullptr; - BasicBlock *FirstUnwindDest = nullptr; - for (User *U : CPI.users()) { - BasicBlock *UnwindDest; - if (CleanupReturnInst *CRI = dyn_cast<CleanupReturnInst>(U)) { - UnwindDest = CRI->getUnwindDest(); - } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) { - continue; - } else if (CallSite(U)) { - continue; - } else { - Assert(false, "bogus cleanuppad use", &CPI); + Value *FirstUnwindPad = nullptr; + SmallVector<FuncletPadInst *, 8> Worklist({&FPI}); + while (!Worklist.empty()) { + FuncletPadInst *CurrentPad = Worklist.pop_back_val(); + Value *UnresolvedAncestorPad = nullptr; + for (User *U : CurrentPad->users()) { + BasicBlock *UnwindDest; + if (auto *CRI = dyn_cast<CleanupReturnInst>(U)) { + UnwindDest = CRI->getUnwindDest(); + } else if (auto *CSI = dyn_cast<CatchSwitchInst>(U)) { + // We allow catchswitch unwind to caller to nest + // within an outer pad that unwinds somewhere else, + // because catchswitch doesn't have a nounwind variant. + // See e.g. SimplifyCFGOpt::SimplifyUnreachable. + if (CSI->unwindsToCaller()) + continue; + UnwindDest = CSI->getUnwindDest(); + } else if (auto *II = dyn_cast<InvokeInst>(U)) { + UnwindDest = II->getUnwindDest(); + } else if (isa<CallInst>(U)) { + // Calls which don't unwind may be found inside funclet + // pads that unwind somewhere else. We don't *require* + // such calls to be annotated nounwind. + continue; + } else if (auto *CPI = dyn_cast<CleanupPadInst>(U)) { + // The unwind dest for a cleanup can only be found by + // recursive search. Add it to the worklist, and we'll + // search for its first use that determines where it unwinds. + Worklist.push_back(CPI); + continue; + } else { + Assert(isa<CatchReturnInst>(U), "Bogus funclet pad use", U); + continue; + } + + Value *UnwindPad; + bool ExitsFPI; + if (UnwindDest) { + UnwindPad = UnwindDest->getFirstNonPHI(); + Value *UnwindParent = getParentPad(UnwindPad); + // Ignore unwind edges that don't exit CurrentPad. + if (UnwindParent == CurrentPad) + continue; + // Determine whether the original funclet pad is exited, + // and if we are scanning nested pads determine how many + // of them are exited so we can stop searching their + // children. + Value *ExitedPad = CurrentPad; + ExitsFPI = false; + do { + if (ExitedPad == &FPI) { + ExitsFPI = true; + // Now we can resolve any ancestors of CurrentPad up to + // FPI, but not including FPI since we need to make sure + // to check all direct users of FPI for consistency. + UnresolvedAncestorPad = &FPI; + break; + } + Value *ExitedParent = getParentPad(ExitedPad); + if (ExitedParent == UnwindParent) { + // ExitedPad is the ancestor-most pad which this unwind + // edge exits, so we can resolve up to it, meaning that + // ExitedParent is the first ancestor still unresolved. + UnresolvedAncestorPad = ExitedParent; + break; + } + ExitedPad = ExitedParent; + } while (!isa<ConstantTokenNone>(ExitedPad)); + } else { + // Unwinding to caller exits all pads. + UnwindPad = ConstantTokenNone::get(FPI.getContext()); + ExitsFPI = true; + UnresolvedAncestorPad = &FPI; + } + + if (ExitsFPI) { + // This unwind edge exits FPI. Make sure it agrees with other + // such edges. + if (FirstUser) { + Assert(UnwindPad == FirstUnwindPad, "Unwind edges out of a funclet " + "pad must have the same unwind " + "dest", + &FPI, U, FirstUser); + } else { + FirstUser = U; + FirstUnwindPad = UnwindPad; + // Record cleanup sibling unwinds for verifySiblingFuncletUnwinds + if (isa<CleanupPadInst>(&FPI) && !isa<ConstantTokenNone>(UnwindPad) && + getParentPad(UnwindPad) == getParentPad(&FPI)) + SiblingFuncletInfo[&FPI] = cast<TerminatorInst>(U); + } + } + // Make sure we visit all uses of FPI, but for nested pads stop as + // soon as we know where they unwind to. + if (CurrentPad != &FPI) + break; } + if (UnresolvedAncestorPad) { + if (CurrentPad == UnresolvedAncestorPad) { + // When CurrentPad is FPI itself, we don't mark it as resolved even if + // we've found an unwind edge that exits it, because we need to verify + // all direct uses of FPI. + assert(CurrentPad == &FPI); + continue; + } + // Pop off the worklist any nested pads that we've found an unwind + // destination for. The pads on the worklist are the uncles, + // great-uncles, etc. of CurrentPad. We've found an unwind destination + // for all ancestors of CurrentPad up to but not including + // UnresolvedAncestorPad. + Value *ResolvedPad = CurrentPad; + while (!Worklist.empty()) { + Value *UnclePad = Worklist.back(); + Value *AncestorPad = getParentPad(UnclePad); + // Walk ResolvedPad up the ancestor list until we either find the + // uncle's parent or the last resolved ancestor. + while (ResolvedPad != AncestorPad) { + Value *ResolvedParent = getParentPad(ResolvedPad); + if (ResolvedParent == UnresolvedAncestorPad) { + break; + } + ResolvedPad = ResolvedParent; + } + // If the resolved ancestor search didn't find the uncle's parent, + // then the uncle is not yet resolved. + if (ResolvedPad != AncestorPad) + break; + // This uncle is resolved, so pop it from the worklist. + Worklist.pop_back(); + } + } + } - if (!FirstUser) { - FirstUser = U; - FirstUnwindDest = UnwindDest; - } else { - Assert( - UnwindDest == FirstUnwindDest, - "cleanupret instructions from the same cleanuppad must have the same " - "unwind destination", - FirstUser, U); + if (FirstUnwindPad) { + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(FPI.getParentPad())) { + BasicBlock *SwitchUnwindDest = CatchSwitch->getUnwindDest(); + Value *SwitchUnwindPad; + if (SwitchUnwindDest) + SwitchUnwindPad = SwitchUnwindDest->getFirstNonPHI(); + else + SwitchUnwindPad = ConstantTokenNone::get(FPI.getContext()); + Assert(SwitchUnwindPad == FirstUnwindPad, + "Unwind edges out of a catch must have the same unwind dest as " + "the parent catchswitch", + &FPI, FirstUser, CatchSwitch); } } - visitInstruction(CPI); + visitInstruction(FPI); } void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) { @@ -3067,17 +3300,21 @@ void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) { "CatchSwitchInst not the first non-PHI instruction in the block.", &CatchSwitch); + auto *ParentPad = CatchSwitch.getParentPad(); + Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad), + "CatchSwitchInst has an invalid parent.", ParentPad); + if (BasicBlock *UnwindDest = CatchSwitch.getUnwindDest()) { Instruction *I = UnwindDest->getFirstNonPHI(); Assert(I->isEHPad() && !isa<LandingPadInst>(I), "CatchSwitchInst must unwind to an EH block which is not a " "landingpad.", &CatchSwitch); - } - auto *ParentPad = CatchSwitch.getParentPad(); - Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad), - "CatchSwitchInst has an invalid parent.", ParentPad); + // Record catchswitch sibling unwinds for verifySiblingFuncletUnwinds + if (getParentPad(I) == ParentPad) + SiblingFuncletInfo[&CatchSwitch] = &CatchSwitch; + } Assert(CatchSwitch.getNumHandlers() != 0, "CatchSwitchInst cannot have empty handler list", &CatchSwitch); @@ -3652,6 +3889,9 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { case Intrinsic::experimental_gc_relocate: { Assert(CS.getNumArgOperands() == 3, "wrong number of arguments", CS); + Assert(isa<PointerType>(CS.getType()->getScalarType()), + "gc.relocate must return a pointer or a vector of pointers", CS); + // Check that this relocate is correctly tied to the statepoint // This is case for relocate on the unwinding path of an invoke statepoint @@ -3734,17 +3974,20 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { "'gc parameters' section of the statepoint call", CS); - // Relocated value must be a pointer type, but gc_relocate does not need to return the - // same pointer type as the relocated pointer. It can be casted to the correct type later - // if it's desired. However, they must have the same address space. + // Relocated value must be either a pointer type or vector-of-pointer type, + // but gc_relocate does not need to return the same pointer type as the + // relocated pointer. It can be casted to the correct type later if it's + // desired. However, they must have the same address space and 'vectorness' GCRelocateInst &Relocate = cast<GCRelocateInst>(*CS.getInstruction()); - Assert(Relocate.getDerivedPtr()->getType()->isPointerTy(), + Assert(Relocate.getDerivedPtr()->getType()->getScalarType()->isPointerTy(), "gc.relocate: relocated value must be a gc pointer", CS); - // gc_relocate return type must be a pointer type, and is verified earlier in - // VerifyIntrinsicType(). - Assert(cast<PointerType>(CS.getType())->getAddressSpace() == - cast<PointerType>(Relocate.getDerivedPtr()->getType())->getAddressSpace(), + auto ResultType = CS.getType(); + auto DerivedType = Relocate.getDerivedPtr()->getType(); + Assert(ResultType->isVectorTy() == DerivedType->isVectorTy(), + "gc.relocate: vector relocates to vector and pointer to pointer", CS); + Assert(ResultType->getPointerAddressSpace() == + DerivedType->getPointerAddressSpace(), "gc.relocate: relocating a pointer shouldn't change its address space", CS); break; } diff --git a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp index 6baaaa4..66df23b 100644 --- a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -92,7 +92,8 @@ void LTOCodeGenerator::initializeLTOPasses() { initializeSROALegacyPassPass(R); initializeSROA_DTPass(R); initializeSROA_SSAUpPass(R); - initializeFunctionAttrsPass(R); + initializePostOrderFunctionAttrsPass(R); + initializeReversePostOrderFunctionAttrsPass(R); initializeGlobalsAAWrapperPassPass(R); initializeLICMPass(R); initializeMergedLoadStoreMotionPass(R); diff --git a/contrib/llvm/lib/Linker/IRMover.cpp b/contrib/llvm/lib/Linker/IRMover.cpp index 309690f..8dd59f9 100644 --- a/contrib/llvm/lib/Linker/IRMover.cpp +++ b/contrib/llvm/lib/Linker/IRMover.cpp @@ -773,6 +773,16 @@ GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV, NewGV->setLinkage(GlobalValue::ExternalWeakLinkage); NewGV->copyAttributesFrom(SGV); + + // Remove these copied constants in case this stays a declaration, since + // they point to the source module. If the def is linked the values will + // be mapped in during linkFunctionBody. + if (auto *NewF = dyn_cast<Function>(NewGV)) { + NewF->setPersonalityFn(nullptr); + NewF->setPrefixData(nullptr); + NewF->setPrologueData(nullptr); + } + return NewGV; } @@ -1211,6 +1221,18 @@ void IRLinker::findNeededSubprograms(ValueToValueMapTy &ValueMap) { for (unsigned I = 0, E = CompileUnits->getNumOperands(); I != E; ++I) { auto *CU = cast<DICompileUnit>(CompileUnits->getOperand(I)); assert(CU && "Expected valid compile unit"); + // Ensure that we don't remove subprograms referenced by DIImportedEntity. + // It is not legal to have a DIImportedEntity with a null entity or scope. + // FIXME: The DISubprogram for functions not linked in but kept due to + // being referenced by a DIImportedEntity should also get their + // IsDefinition flag is unset. + SmallPtrSet<DISubprogram *, 8> ImportedEntitySPs; + for (auto *IE : CU->getImportedEntities()) { + if (auto *SP = dyn_cast<DISubprogram>(IE->getEntity())) + ImportedEntitySPs.insert(SP); + if (auto *SP = dyn_cast<DISubprogram>(IE->getScope())) + ImportedEntitySPs.insert(SP); + } for (auto *Op : CU->getSubprograms()) { // Unless we were doing function importing and deferred metadata linking, // any needed SPs should have been mapped as they would be reached @@ -1218,7 +1240,7 @@ void IRLinker::findNeededSubprograms(ValueToValueMapTy &ValueMap) { // function bodies, or from DILocation on inlined instructions). assert(!(ValueMap.MD()[Op] && IsMetadataLinkingPostpass) && "DISubprogram shouldn't be mapped yet"); - if (!ValueMap.MD()[Op]) + if (!ValueMap.MD()[Op] && !ImportedEntitySPs.count(Op)) UnneededSubprograms.insert(Op); } } diff --git a/contrib/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm/lib/Linker/LinkModules.cpp index 9de3be4..6ffa71e 100644 --- a/contrib/llvm/lib/Linker/LinkModules.cpp +++ b/contrib/llvm/lib/Linker/LinkModules.cpp @@ -65,9 +65,6 @@ class ModuleLinker { return Flags & Linker::InternalizeLinkedSymbols; } - /// Check if we should promote the given local value to global scope. - bool doPromoteLocalToGlobal(const GlobalValue *SGV); - bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest, const GlobalValue &Src); @@ -97,11 +94,11 @@ class ModuleLinker { Module &DstM = Mover.getModule(); // If the source has no name it can't link. If it has local linkage, // there is no name match-up going on. - if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(getLinkage(SrcGV))) + if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(SrcGV->getLinkage())) return nullptr; // Otherwise see if we have a match in the destination module's symtab. - GlobalValue *DGV = DstM.getNamedValue(getName(SrcGV)); + GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName()); if (!DGV) return nullptr; @@ -116,6 +113,64 @@ class ModuleLinker { bool linkIfNeeded(GlobalValue &GV); + /// Helper method to check if we are importing from the current source + /// module. + bool isPerformingImport() const { return FunctionsToImport != nullptr; } + + /// If we are importing from the source module, checks if we should + /// import SGV as a definition, otherwise import as a declaration. + bool doImportAsDefinition(const GlobalValue *SGV); + +public: + ModuleLinker(IRMover &Mover, Module &SrcM, unsigned Flags, + const FunctionInfoIndex *Index = nullptr, + DenseSet<const GlobalValue *> *FunctionsToImport = nullptr, + DenseMap<unsigned, MDNode *> *ValIDToTempMDMap = nullptr) + : Mover(Mover), SrcM(SrcM), Flags(Flags), ImportIndex(Index), + FunctionsToImport(FunctionsToImport), + ValIDToTempMDMap(ValIDToTempMDMap) { + assert((ImportIndex || !FunctionsToImport) && + "Expect a FunctionInfoIndex when importing"); + // If we have a FunctionInfoIndex but no function to import, + // then this is the primary module being compiled in a ThinLTO + // backend compilation, and we need to see if it has functions that + // may be exported to another backend compilation. + if (ImportIndex && !FunctionsToImport) + HasExportedFunctions = ImportIndex->hasExportedFunctions(SrcM); + assert((ValIDToTempMDMap || !FunctionsToImport) && + "Function importing must provide a ValIDToTempMDMap"); + } + + bool run(); +}; + +/// Class to handle necessary GlobalValue changes required by ThinLTO including +/// linkage changes and any necessary renaming. +class ThinLTOGlobalProcessing { + /// The Module which we are exporting or importing functions from. + Module &M; + + /// Function index passed in for function importing/exporting handling. + const FunctionInfoIndex *ImportIndex; + + /// Functions to import from this module, all other functions will be + /// imported as declarations instead of definitions. + DenseSet<const GlobalValue *> *FunctionsToImport; + + /// Set to true if the given FunctionInfoIndex contains any functions + /// from this source module, in which case we must conservatively assume + /// that any of its functions may be imported into another module + /// as part of a different backend compilation process. + bool HasExportedFunctions = false; + + /// Populated during ThinLTO global processing with locals promoted + /// to global scope in an exporting module, which now need to be linked + /// in if calling from the ModuleLinker. + SetVector<GlobalValue *> NewExportedValues; + + /// Check if we should promote the given local value to global scope. + bool doPromoteLocalToGlobal(const GlobalValue *SGV); + /// Helper methods to check if we are importing from or potentially /// exporting from the current source module. bool isPerformingImport() const { return FunctionsToImport != nullptr; } @@ -143,32 +198,30 @@ class ModuleLinker { GlobalValue::LinkageTypes getLinkage(const GlobalValue *SGV); public: - ModuleLinker(IRMover &Mover, Module &SrcM, unsigned Flags, - const FunctionInfoIndex *Index = nullptr, - DenseSet<const GlobalValue *> *FunctionsToImport = nullptr, - DenseMap<unsigned, MDNode *> *ValIDToTempMDMap = nullptr) - : Mover(Mover), SrcM(SrcM), Flags(Flags), ImportIndex(Index), - FunctionsToImport(FunctionsToImport), - ValIDToTempMDMap(ValIDToTempMDMap) { - assert((ImportIndex || !FunctionsToImport) && - "Expect a FunctionInfoIndex when importing"); + ThinLTOGlobalProcessing( + Module &M, const FunctionInfoIndex *Index, + DenseSet<const GlobalValue *> *FunctionsToImport = nullptr) + : M(M), ImportIndex(Index), FunctionsToImport(FunctionsToImport) { // If we have a FunctionInfoIndex but no function to import, // then this is the primary module being compiled in a ThinLTO // backend compilation, and we need to see if it has functions that // may be exported to another backend compilation. - if (ImportIndex && !FunctionsToImport) - HasExportedFunctions = ImportIndex->hasExportedFunctions(SrcM); - assert((ValIDToTempMDMap || !FunctionsToImport) && - "Function importing must provide a ValIDToTempMDMap"); + if (!FunctionsToImport) + HasExportedFunctions = ImportIndex->hasExportedFunctions(M); } bool run(); + + /// Access the promoted globals that are now exported and need to be linked. + SetVector<GlobalValue *> &getNewExportedValues() { return NewExportedValues; } }; } -bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) { - if (!isPerformingImport()) - return false; +/// Checks if we should import SGV as a definition, otherwise import as a +/// declaration. +static bool +doImportAsDefinitionImpl(const GlobalValue *SGV, + DenseSet<const GlobalValue *> *FunctionsToImport) { auto *GA = dyn_cast<GlobalAlias>(SGV); if (GA) { if (GA->hasWeakAnyLinkage()) @@ -176,7 +229,7 @@ bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) { const GlobalObject *GO = GA->getBaseObject(); if (!GO->hasLinkOnceODRLinkage()) return false; - return doImportAsDefinition(GO); + return doImportAsDefinitionImpl(GO, FunctionsToImport); } // Always import GlobalVariable definitions, except for the special // case of WeakAny which are imported as ExternalWeak declarations @@ -196,7 +249,19 @@ bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) { return false; } -bool ModuleLinker::doPromoteLocalToGlobal(const GlobalValue *SGV) { +bool ThinLTOGlobalProcessing::doImportAsDefinition(const GlobalValue *SGV) { + if (!isPerformingImport()) + return false; + return doImportAsDefinitionImpl(SGV, FunctionsToImport); +} + +bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) { + if (!isPerformingImport()) + return false; + return doImportAsDefinitionImpl(SGV, FunctionsToImport); +} + +bool ThinLTOGlobalProcessing::doPromoteLocalToGlobal(const GlobalValue *SGV) { assert(SGV->hasLocalLinkage()); // Both the imported references and the original local variable must // be promoted. @@ -220,7 +285,7 @@ bool ModuleLinker::doPromoteLocalToGlobal(const GlobalValue *SGV) { return true; } -std::string ModuleLinker::getName(const GlobalValue *SGV) { +std::string ThinLTOGlobalProcessing::getName(const GlobalValue *SGV) { // For locals that must be promoted to global scope, ensure that // the promoted name uniquely identifies the copy in the original module, // using the ID assigned during combined index creation. When importing, @@ -234,7 +299,8 @@ std::string ModuleLinker::getName(const GlobalValue *SGV) { return SGV->getName(); } -GlobalValue::LinkageTypes ModuleLinker::getLinkage(const GlobalValue *SGV) { +GlobalValue::LinkageTypes +ThinLTOGlobalProcessing::getLinkage(const GlobalValue *SGV) { // Any local variable that is referenced by an exported function needs // to be promoted to global scope. Since we don't currently know which // functions reference which local variables/functions, we must treat @@ -298,8 +364,7 @@ GlobalValue::LinkageTypes ModuleLinker::getLinkage(const GlobalValue *SGV) { // since it would cause global constructors/destructors to be // executed multiple times. This should have already been handled // by linkIfNeeded, and we will assert in shouldLinkFromSource - // if we try to import, so we simply return AppendingLinkage here - // as this helper is called more widely in getLinkedToGlobal. + // if we try to import, so we simply return AppendingLinkage. return GlobalValue::AppendingLinkage; case GlobalValue::InternalLinkage: @@ -652,7 +717,7 @@ void ModuleLinker::addLazyFor(GlobalValue &GV, IRMover::ValueAdder Add) { } } -void ModuleLinker::processGlobalForThinLTO(GlobalValue &GV) { +void ThinLTOGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { if (GV.hasLocalLinkage() && (doPromoteLocalToGlobal(&GV) || isPerformingImport())) { GV.setName(getName(&GV)); @@ -660,21 +725,26 @@ void ModuleLinker::processGlobalForThinLTO(GlobalValue &GV) { if (!GV.hasLocalLinkage()) GV.setVisibility(GlobalValue::HiddenVisibility); if (isModuleExporting()) - ValuesToLink.insert(&GV); + NewExportedValues.insert(&GV); return; } GV.setLinkage(getLinkage(&GV)); } -void ModuleLinker::processGlobalsForThinLTO() { - for (GlobalVariable &GV : SrcM.globals()) +void ThinLTOGlobalProcessing::processGlobalsForThinLTO() { + for (GlobalVariable &GV : M.globals()) processGlobalForThinLTO(GV); - for (Function &SF : SrcM) + for (Function &SF : M) processGlobalForThinLTO(SF); - for (GlobalAlias &GA : SrcM.aliases()) + for (GlobalAlias &GA : M.aliases()) processGlobalForThinLTO(GA); } +bool ThinLTOGlobalProcessing::run() { + processGlobalsForThinLTO(); + return false; +} + bool ModuleLinker::run() { for (const auto &SMEC : SrcM.getComdatSymbolTable()) { const Comdat &C = SMEC.getValue(); @@ -713,7 +783,14 @@ bool ModuleLinker::run() { if (linkIfNeeded(GA)) return true; - processGlobalsForThinLTO(); + if (ImportIndex) { + ThinLTOGlobalProcessing ThinLTOProcessing(SrcM, ImportIndex, + FunctionsToImport); + if (ThinLTOProcessing.run()) + return true; + for (auto *GV : ThinLTOProcessing.getNewExportedValues()) + ValuesToLink.insert(GV); + } for (unsigned I = 0; I < ValuesToLink.size(); ++I) { GlobalValue *GV = ValuesToLink[I]; @@ -786,15 +863,9 @@ bool Linker::linkModules(Module &Dest, std::unique_ptr<Module> Src, return L.linkInModule(std::move(Src), Flags); } -std::unique_ptr<Module> -llvm::renameModuleForThinLTO(std::unique_ptr<Module> M, - const FunctionInfoIndex *Index) { - std::unique_ptr<llvm::Module> RenamedModule( - new llvm::Module(M->getModuleIdentifier(), M->getContext())); - Linker L(*RenamedModule.get()); - if (L.linkInModule(std::move(M), llvm::Linker::Flags::None, Index)) - return nullptr; - return RenamedModule; +bool llvm::renameModuleForThinLTO(Module &M, const FunctionInfoIndex *Index) { + ThinLTOGlobalProcessing ThinLTOProcessing(M, Index); + return ThinLTOProcessing.run(); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp index 0f26b38..748644b 100644 --- a/contrib/llvm/lib/MC/MCExpr.cpp +++ b/contrib/llvm/lib/MC/MCExpr.cpp @@ -300,6 +300,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_Hexagon_LD_PLT: return "LDPLT"; case VK_Hexagon_IE: return "IE"; case VK_Hexagon_IE_GOT: return "IEGOT"; + case VK_WebAssembly_FUNCTION: return "FUNCTION"; case VK_TPREL: return "tprel"; case VK_DTPREL: return "dtprel"; } diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp index 34f49ca..f86f7e4 100644 --- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp @@ -256,6 +256,9 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) { DwarfRangesSection = Ctx->getMachOSection("__DWARF", "__debug_ranges", MachO::S_ATTR_DEBUG, SectionKind::getMetadata(), "debug_range"); + DwarfMacinfoSection = + Ctx->getMachOSection("__DWARF", "__debug_macinfo", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfDebugInlineSection = Ctx->getMachOSection("__DWARF", "__debug_inlined", MachO::S_ATTR_DEBUG, SectionKind::getMetadata()); @@ -505,6 +508,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) { Ctx->getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0); DwarfRangesSection = Ctx->getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0, "debug_range"); + DwarfMacinfoSection = + Ctx->getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0); // DWARF5 Experimental Debug Info @@ -684,6 +689,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) { COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata(), "debug_range"); + DwarfMacinfoSection = Ctx->getCOFFSection( + ".debug_macinfo", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); DwarfInfoDWOSection = Ctx->getCOFFSection( ".debug_info.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp index d0a7daf..972610a 100644 --- a/contrib/llvm/lib/MC/MCObjectStreamer.cpp +++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp @@ -64,8 +64,6 @@ void MCObjectStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi, return; } - assert(Hi->getOffset() >= Lo->getOffset() && - "Expected Hi to be greater than Lo"); EmitIntValue(Hi->getOffset() - Lo->getOffset(), Size); } diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp index a382090..a76cbdb 100644 --- a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp +++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -969,9 +969,6 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm, Header.PointerToSymbolTable = offset; - // FIXME: Remove the #else branch and make the #if branch unconditional once - // LLVM's self host configuration is aware of /Brepro. -#if (ENABLE_TIMESTAMPS == 1) // MS LINK expects to be able to use this timestamp to implement their // /INCREMENTAL feature. if (Asm.isIncrementalLinkerCompatible()) { @@ -980,12 +977,9 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm, Now = UINT32_MAX; Header.TimeDateStamp = Now; } else { + // Have deterministic output if /INCREMENTAL isn't needed. Also matches GNU. Header.TimeDateStamp = 0; } -#else - // We want a deterministic output. It looks like GNU as also writes 0 in here. - Header.TimeDateStamp = 0; -#endif // Write it all to disk... WriteFileHeader(Header); diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp index 1f21117..4cd6aff 100644 --- a/contrib/llvm/lib/Object/COFFObjectFile.cpp +++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp @@ -1336,6 +1336,30 @@ ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const { return std::error_code(); } +std::error_code ExportDirectoryEntryRef::isForwarder(bool &Result) const { + const data_directory *DataEntry; + if (auto EC = OwningObject->getDataDirectory(COFF::EXPORT_TABLE, DataEntry)) + return EC; + uint32_t RVA; + if (auto EC = getExportRVA(RVA)) + return EC; + uint32_t Begin = DataEntry->RelativeVirtualAddress; + uint32_t End = DataEntry->RelativeVirtualAddress + DataEntry->Size; + Result = (Begin <= RVA && RVA < End); + return std::error_code(); +} + +std::error_code ExportDirectoryEntryRef::getForwardTo(StringRef &Result) const { + uint32_t RVA; + if (auto EC = getExportRVA(RVA)) + return EC; + uintptr_t IntPtr = 0; + if (auto EC = OwningObject->getRvaPtr(RVA, IntPtr)) + return EC; + Result = StringRef(reinterpret_cast<const char *>(IntPtr)); + return std::error_code(); +} + bool ImportedSymbolRef:: operator==(const ImportedSymbolRef &Other) const { return Entry32 == Other.Entry32 && Entry64 == Other.Entry64 diff --git a/contrib/llvm/lib/Object/ELF.cpp b/contrib/llvm/lib/Object/ELF.cpp index 62c27cc..12b772d 100644 --- a/contrib/llvm/lib/Object/ELF.cpp +++ b/contrib/llvm/lib/Object/ELF.cpp @@ -91,6 +91,13 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { break; } break; + case ELF::EM_WEBASSEMBLY: + switch (Type) { +#include "llvm/Support/ELFRelocs/WebAssembly.def" + default: + break; + } + break; default: break; } diff --git a/contrib/llvm/lib/ProfileData/CoverageMapping.cpp b/contrib/llvm/lib/ProfileData/CoverageMapping.cpp index 55c0fb4..f5d477b 100644 --- a/contrib/llvm/lib/ProfileData/CoverageMapping.cpp +++ b/contrib/llvm/lib/ProfileData/CoverageMapping.cpp @@ -517,6 +517,6 @@ class CoverageMappingErrorCategoryType : public std::error_category { static ManagedStatic<CoverageMappingErrorCategoryType> ErrorCategory; -const std::error_category &llvm::coveragemap_category() { +const std::error_category &llvm::coverage::coveragemap_category() { return *ErrorCategory; } diff --git a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp index 32c692d..89e1cf4 100644 --- a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp +++ b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp @@ -319,21 +319,14 @@ static std::error_code readCoverageMappingData( if (Buf + sizeof(CovMapHeader) > End) return coveragemap_error::malformed; auto CovHeader = reinterpret_cast<const coverage::CovMapHeader *>(Buf); - uint32_t NRecords = - endian::byte_swap<uint32_t, Endian>(CovHeader->NRecords); - uint32_t FilenamesSize = - endian::byte_swap<uint32_t, Endian>(CovHeader->FilenamesSize); - uint32_t CoverageSize = - endian::byte_swap<uint32_t, Endian>(CovHeader->CoverageSize); - uint32_t Version = endian::byte_swap<uint32_t, Endian>(CovHeader->Version); + uint32_t NRecords = CovHeader->getNRecords<Endian>(); + uint32_t FilenamesSize = CovHeader->getFilenamesSize<Endian>(); + uint32_t CoverageSize = CovHeader->getCoverageSize<Endian>(); + uint32_t Version = CovHeader->getVersion<Endian>(); Buf = reinterpret_cast<const char *>(++CovHeader); - switch (Version) { - case CoverageMappingVersion1: - break; - default: + if (Version > coverage::CoverageMappingCurrentVersion) return coveragemap_error::unsupported_version; - } // Skip past the function records, saving the start and end for later. const char *FunBuf = Buf; @@ -364,11 +357,8 @@ static std::error_code readCoverageMappingData( reinterpret_cast<const coverage::CovMapFunctionRecord<T> *>(FunBuf); while ((const char *)CFR < FunEnd) { // Read the function information - T NamePtr = endian::byte_swap<T, Endian>(CFR->NamePtr); - uint32_t NameSize = endian::byte_swap<uint32_t, Endian>(CFR->NameSize); - uint32_t DataSize = endian::byte_swap<uint32_t, Endian>(CFR->DataSize); - uint64_t FuncHash = endian::byte_swap<uint64_t, Endian>(CFR->FuncHash); - CFR++; + uint32_t DataSize = CFR->template getDataSize<Endian>(); + uint64_t FuncHash = CFR->template getFuncHash<Endian>(); // Now use that to read the coverage data. if (CovBuf + DataSize > CovEnd) @@ -379,16 +369,18 @@ static std::error_code readCoverageMappingData( // Ignore this record if we already have a record that points to the same // function name. This is useful to ignore the redundant records for the // functions with ODR linkage. - if (!UniqueFunctionMappingData.insert(NamePtr).second) + T NameRef = CFR->template getFuncNameRef<Endian>(); + if (!UniqueFunctionMappingData.insert(NameRef).second) continue; - // Finally, grab the name and create a record. - StringRef FuncName = ProfileNames.getFuncName(NamePtr, NameSize); - if (NameSize && FuncName.empty()) - return coveragemap_error::malformed; + StringRef FuncName; + if (std::error_code EC = + CFR->template getFuncName<Endian>(ProfileNames, FuncName)) + return EC; Records.push_back(BinaryCoverageReader::ProfileMappingRecord( CoverageMappingVersion(Version), FuncName, FuncHash, Mapping, FilenamesBegin, Filenames.size() - FilenamesBegin)); + CFR++; } } diff --git a/contrib/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm/lib/ProfileData/InstrProf.cpp index 027f0f7..d677763 100644 --- a/contrib/llvm/lib/ProfileData/InstrProf.cpp +++ b/contrib/llvm/lib/ProfileData/InstrProf.cpp @@ -257,9 +257,8 @@ int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) { return 0; } -instrprof_error -InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input, - uint64_t Weight) { +instrprof_error InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input, + uint64_t Weight) { this->sortByTargetValues(); Input.sortByTargetValues(); auto I = ValueData.begin(); @@ -270,14 +269,8 @@ InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input, while (I != IE && I->Value < J->Value) ++I; if (I != IE && I->Value == J->Value) { - uint64_t JCount = J->Count; bool Overflowed; - if (Weight > 1) { - JCount = SaturatingMultiply(JCount, Weight, &Overflowed); - if (Overflowed) - Result = instrprof_error::counter_overflow; - } - I->Count = SaturatingAdd(I->Count, JCount, &Overflowed); + I->Count = SaturatingMultiplyAdd(J->Count, Weight, I->Count, &Overflowed); if (Overflowed) Result = instrprof_error::counter_overflow; ++I; @@ -288,6 +281,17 @@ InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input, return Result; } +instrprof_error InstrProfValueSiteRecord::scale(uint64_t Weight) { + instrprof_error Result = instrprof_error::success; + for (auto I = ValueData.begin(), IE = ValueData.end(); I != IE; ++I) { + bool Overflowed; + I->Count = SaturatingMultiply(I->Count, Weight, &Overflowed); + if (Overflowed) + Result = instrprof_error::counter_overflow; + } + return Result; +} + // Merge Value Profile data from Src record to this record for ValueKind. // Scale merged value counts by \p Weight. instrprof_error InstrProfRecord::mergeValueProfData(uint32_t ValueKind, @@ -303,8 +307,7 @@ instrprof_error InstrProfRecord::mergeValueProfData(uint32_t ValueKind, Src.getValueSitesForKind(ValueKind); instrprof_error Result = instrprof_error::success; for (uint32_t I = 0; I < ThisNumValueSites; I++) - MergeResult(Result, - ThisSiteRecords[I].mergeValueData(OtherSiteRecords[I], Weight)); + MergeResult(Result, ThisSiteRecords[I].merge(OtherSiteRecords[I], Weight)); return Result; } @@ -319,13 +322,8 @@ instrprof_error InstrProfRecord::merge(InstrProfRecord &Other, for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) { bool Overflowed; - uint64_t OtherCount = Other.Counts[I]; - if (Weight > 1) { - OtherCount = SaturatingMultiply(OtherCount, Weight, &Overflowed); - if (Overflowed) - Result = instrprof_error::counter_overflow; - } - Counts[I] = SaturatingAdd(Counts[I], OtherCount, &Overflowed); + Counts[I] = + SaturatingMultiplyAdd(Other.Counts[I], Weight, Counts[I], &Overflowed); if (Overflowed) Result = instrprof_error::counter_overflow; } @@ -336,6 +334,32 @@ instrprof_error InstrProfRecord::merge(InstrProfRecord &Other, return Result; } +instrprof_error InstrProfRecord::scaleValueProfData(uint32_t ValueKind, + uint64_t Weight) { + uint32_t ThisNumValueSites = getNumValueSites(ValueKind); + std::vector<InstrProfValueSiteRecord> &ThisSiteRecords = + getValueSitesForKind(ValueKind); + instrprof_error Result = instrprof_error::success; + for (uint32_t I = 0; I < ThisNumValueSites; I++) + MergeResult(Result, ThisSiteRecords[I].scale(Weight)); + return Result; +} + +instrprof_error InstrProfRecord::scale(uint64_t Weight) { + instrprof_error Result = instrprof_error::success; + for (auto &Count : this->Counts) { + bool Overflowed; + Count = SaturatingMultiply(Count, Weight, &Overflowed); + if (Overflowed && Result == instrprof_error::success) { + Result = instrprof_error::counter_overflow; + } + } + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + MergeResult(Result, scaleValueProfData(Kind, Weight)); + + return Result; +} + // Map indirect call target name hash to name string. uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind, ValueMapType *ValueMap) { diff --git a/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp index 9bb03e1..f522724 100644 --- a/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -104,27 +104,21 @@ std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I, ProfileDataMap.insert(std::make_pair(I.Hash, InstrProfRecord())); InstrProfRecord &Dest = Where->second; - instrprof_error Result; + instrprof_error Result = instrprof_error::success; if (NewFunc) { // We've never seen a function with this name and hash, add it. Dest = std::move(I); // Fix up the name to avoid dangling reference. Dest.Name = FunctionData.find(Dest.Name)->getKey(); - Result = instrprof_error::success; - if (Weight > 1) { - for (auto &Count : Dest.Counts) { - bool Overflowed; - Count = SaturatingMultiply(Count, Weight, &Overflowed); - if (Overflowed && Result == instrprof_error::success) { - Result = instrprof_error::counter_overflow; - } - } - } + if (Weight > 1) + Result = Dest.scale(Weight); } else { // We're updating a function we've seen before. Result = Dest.merge(I, Weight); } + Dest.sortValueData(); + // We keep track of the max function count as we go for simplicity. // Update this statistic no matter the result of the merge. if (Dest.Counts[0] > MaxFunctionCount) diff --git a/contrib/llvm/lib/Support/Debug.cpp b/contrib/llvm/lib/Support/Debug.cpp index 47751fc..323d532 100644 --- a/contrib/llvm/lib/Support/Debug.cpp +++ b/contrib/llvm/lib/Support/Debug.cpp @@ -95,7 +95,10 @@ struct DebugOnlyOpt { if (Val.empty()) return; DebugFlag = true; - CurrentDebugType->push_back(Val); + SmallVector<StringRef,8> dbgTypes; + StringRef(Val).split(dbgTypes, ',', -1, false); + for (auto dbgType : dbgTypes) + CurrentDebugType->push_back(dbgType); } }; @@ -104,10 +107,9 @@ struct DebugOnlyOpt { static DebugOnlyOpt DebugOnlyOptLoc; static cl::opt<DebugOnlyOpt, true, cl::parser<std::string> > -DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"), +DebugOnly("debug-only", cl::desc("Enable a specific type of debug output (comma separated list of types)"), cl::Hidden, cl::ZeroOrMore, cl::value_desc("debug string"), cl::location(DebugOnlyOptLoc), cl::ValueRequired); - // Signal handlers - dump debug output on termination. static void debug_user_sig_handler(void *Cookie) { // This is a bit sneaky. Since this is under #ifndef NDEBUG, we diff --git a/contrib/llvm/lib/Support/IntEqClasses.cpp b/contrib/llvm/lib/Support/IntEqClasses.cpp index 1134495..ff21357 100644 --- a/contrib/llvm/lib/Support/IntEqClasses.cpp +++ b/contrib/llvm/lib/Support/IntEqClasses.cpp @@ -29,7 +29,7 @@ void IntEqClasses::grow(unsigned N) { EC.push_back(EC.size()); } -void IntEqClasses::join(unsigned a, unsigned b) { +unsigned IntEqClasses::join(unsigned a, unsigned b) { assert(NumClasses == 0 && "join() called after compress()."); unsigned eca = EC[a]; unsigned ecb = EC[b]; @@ -41,6 +41,8 @@ void IntEqClasses::join(unsigned a, unsigned b) { EC[b] = eca, b = ecb, ecb = EC[b]; else EC[a] = ecb, a = eca, eca = EC[a]; + + return eca; } unsigned IntEqClasses::findLeader(unsigned a) const { diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp index 3bb1116..0e5d3ac 100644 --- a/contrib/llvm/lib/Support/Triple.cpp +++ b/contrib/llvm/lib/Support/Triple.cpp @@ -1154,8 +1154,6 @@ Triple Triple::get32BitArchVariant() const { Triple T(*this); switch (getArch()) { case Triple::UnknownArch: - case Triple::aarch64: - case Triple::aarch64_be: case Triple::amdgcn: case Triple::avr: case Triple::bpfel: @@ -1191,17 +1189,19 @@ Triple Triple::get32BitArchVariant() const { // Already 32-bit. break; - case Triple::le64: T.setArch(Triple::le32); break; - case Triple::mips64: T.setArch(Triple::mips); break; - case Triple::mips64el: T.setArch(Triple::mipsel); break; - case Triple::nvptx64: T.setArch(Triple::nvptx); break; - case Triple::ppc64: T.setArch(Triple::ppc); break; - case Triple::sparcv9: T.setArch(Triple::sparc); break; - case Triple::x86_64: T.setArch(Triple::x86); break; - case Triple::amdil64: T.setArch(Triple::amdil); break; - case Triple::hsail64: T.setArch(Triple::hsail); break; - case Triple::spir64: T.setArch(Triple::spir); break; - case Triple::wasm64: T.setArch(Triple::wasm32); break; + case Triple::aarch64: T.setArch(Triple::arm); break; + case Triple::aarch64_be: T.setArch(Triple::armeb); break; + case Triple::le64: T.setArch(Triple::le32); break; + case Triple::mips64: T.setArch(Triple::mips); break; + case Triple::mips64el: T.setArch(Triple::mipsel); break; + case Triple::nvptx64: T.setArch(Triple::nvptx); break; + case Triple::ppc64: T.setArch(Triple::ppc); break; + case Triple::sparcv9: T.setArch(Triple::sparc); break; + case Triple::x86_64: T.setArch(Triple::x86); break; + case Triple::amdil64: T.setArch(Triple::amdil); break; + case Triple::hsail64: T.setArch(Triple::hsail); break; + case Triple::spir64: T.setArch(Triple::spir); break; + case Triple::wasm64: T.setArch(Triple::wasm32); break; } return T; } @@ -1210,16 +1210,12 @@ Triple Triple::get64BitArchVariant() const { Triple T(*this); switch (getArch()) { case Triple::UnknownArch: - case Triple::arm: - case Triple::armeb: case Triple::avr: case Triple::hexagon: case Triple::kalimba: case Triple::msp430: case Triple::r600: case Triple::tce: - case Triple::thumb: - case Triple::thumbeb: case Triple::xcore: case Triple::sparcel: case Triple::shave: @@ -1247,17 +1243,21 @@ Triple Triple::get64BitArchVariant() const { // Already 64-bit. break; - case Triple::le32: T.setArch(Triple::le64); break; - case Triple::mips: T.setArch(Triple::mips64); break; - case Triple::mipsel: T.setArch(Triple::mips64el); break; - case Triple::nvptx: T.setArch(Triple::nvptx64); break; - case Triple::ppc: T.setArch(Triple::ppc64); break; - case Triple::sparc: T.setArch(Triple::sparcv9); break; - case Triple::x86: T.setArch(Triple::x86_64); break; - case Triple::amdil: T.setArch(Triple::amdil64); break; - case Triple::hsail: T.setArch(Triple::hsail64); break; - case Triple::spir: T.setArch(Triple::spir64); break; - case Triple::wasm32: T.setArch(Triple::wasm64); break; + case Triple::arm: T.setArch(Triple::aarch64); break; + case Triple::armeb: T.setArch(Triple::aarch64_be); break; + case Triple::le32: T.setArch(Triple::le64); break; + case Triple::mips: T.setArch(Triple::mips64); break; + case Triple::mipsel: T.setArch(Triple::mips64el); break; + case Triple::nvptx: T.setArch(Triple::nvptx64); break; + case Triple::ppc: T.setArch(Triple::ppc64); break; + case Triple::sparc: T.setArch(Triple::sparcv9); break; + case Triple::x86: T.setArch(Triple::x86_64); break; + case Triple::amdil: T.setArch(Triple::amdil64); break; + case Triple::hsail: T.setArch(Triple::hsail64); break; + case Triple::spir: T.setArch(Triple::spir64); break; + case Triple::thumb: T.setArch(Triple::aarch64); break; + case Triple::thumbeb: T.setArch(Triple::aarch64_be); break; + case Triple::wasm32: T.setArch(Triple::wasm64); break; } return T; } diff --git a/contrib/llvm/lib/Support/Windows/Path.inc b/contrib/llvm/lib/Support/Windows/Path.inc index 4e48412..5ef77b1 100644 --- a/contrib/llvm/lib/Support/Windows/Path.inc +++ b/contrib/llvm/lib/Support/Windows/Path.inc @@ -38,6 +38,7 @@ typedef int errno_t; #ifdef _MSC_VER # pragma comment(lib, "advapi32.lib") // This provides CryptAcquireContextW. +# pragma comment(lib, "ole32.lib") // This provides CoTaskMemFree #endif using namespace llvm; diff --git a/contrib/llvm/lib/Support/Windows/Signals.inc b/contrib/llvm/lib/Support/Windows/Signals.inc index d109a66..f40ca72 100644 --- a/contrib/llvm/lib/Support/Windows/Signals.inc +++ b/contrib/llvm/lib/Support/Windows/Signals.inc @@ -405,7 +405,10 @@ static void RegisterHandler() { // If we cannot load up the APIs (which would be unexpected as they should // exist on every version of Windows we support), we will bail out since // there would be nothing to report. - assert(load64BitDebugHelp() && "These APIs should always be available"); + if (!load64BitDebugHelp()) { + assert(false && "These APIs should always be available"); + return; + } if (RegisteredUnhandledExceptionFilter) { EnterCriticalSection(&CriticalSection); diff --git a/contrib/llvm/lib/Support/Windows/WindowsSupport.h b/contrib/llvm/lib/Support/Windows/WindowsSupport.h index c65e314..60490f2 100644 --- a/contrib/llvm/lib/Support/Windows/WindowsSupport.h +++ b/contrib/llvm/lib/Support/Windows/WindowsSupport.h @@ -47,20 +47,27 @@ #include <string> #include <vector> -#if !defined(__CYGWIN__) && !defined(__MINGW32__) -#include <VersionHelpers.h> -#else -// Cygwin does not have the IsWindows8OrGreater() API. -// Some version of mingw does not have the API either. -inline bool IsWindows8OrGreater() { - OSVERSIONINFO osvi = {}; +/// Determines if the program is running on Windows 8 or newer. This +/// reimplements one of the helpers in the Windows 8.1 SDK, which are intended +/// to supercede raw calls to GetVersionEx. Old SDKs, Cygwin, and MinGW don't +/// yet have VersionHelpers.h, so we have our own helper. +inline bool RunningWindows8OrGreater() { + // Windows 8 is version 6.2, service pack 0. + OSVERSIONINFOEXW osvi = {}; osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); - if (!::GetVersionEx(&osvi)) - return false; - return (osvi.dwMajorVersion > 6 || - (osvi.dwMajorVersion == 6 && osvi.dwMinorVersion >= 2)); + osvi.dwMajorVersion = 6; + osvi.dwMinorVersion = 2; + osvi.wServicePackMajor = 0; + + DWORDLONG Mask = 0; + Mask = VerSetConditionMask(Mask, VER_MAJORVERSION, VER_GREATER_EQUAL); + Mask = VerSetConditionMask(Mask, VER_MINORVERSION, VER_GREATER_EQUAL); + Mask = VerSetConditionMask(Mask, VER_SERVICEPACKMAJOR, VER_GREATER_EQUAL); + + return VerifyVersionInfoW(&osvi, VER_MAJORVERSION | VER_MINORVERSION | + VER_SERVICEPACKMAJOR, + Mask) != FALSE; } -#endif // __CYGWIN__ inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) { if (!ErrMsg) diff --git a/contrib/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm/lib/Support/raw_ostream.cpp index 57162dc..15813fd 100644 --- a/contrib/llvm/lib/Support/raw_ostream.cpp +++ b/contrib/llvm/lib/Support/raw_ostream.cpp @@ -577,7 +577,7 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { // Writing a large size of output to Windows console returns ENOMEM. It seems // that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and // the latter has a size limit (66000 bytes or less, depending on heap usage). - bool ShouldWriteInChunks = !!::_isatty(FD) && !IsWindows8OrGreater(); + bool ShouldWriteInChunks = !!::_isatty(FD) && !RunningWindows8OrGreater(); #endif do { diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 3ef3c8b..f398117 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2487,15 +2487,36 @@ static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, return true; } -/// Return true when there is potentially a faster code sequence -/// for an instruction chain ending in \p Root. All potential patterns are -/// listed -/// in the \p Pattern vector. Pattern should be sorted in priority order since -/// the pattern evaluator stops checking as soon as it finds a faster sequence. +// TODO: There are many more machine instruction opcodes to match: +// 1. Other data types (integer, vectors) +// 2. Other math / logic operations (xor, or) +// 3. Other forms of the same operation (intrinsics and other variants) +bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { + switch (Inst.getOpcode()) { + case AArch64::FADDDrr: + case AArch64::FADDSrr: + case AArch64::FADDv2f32: + case AArch64::FADDv2f64: + case AArch64::FADDv4f32: + case AArch64::FMULDrr: + case AArch64::FMULSrr: + case AArch64::FMULX32: + case AArch64::FMULX64: + case AArch64::FMULXv2f32: + case AArch64::FMULXv2f64: + case AArch64::FMULXv4f32: + case AArch64::FMULv2f32: + case AArch64::FMULv2f64: + case AArch64::FMULv4f32: + return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; + default: + return false; + } +} -bool AArch64InstrInfo::getMachineCombinerPatterns( - MachineInstr &Root, - SmallVectorImpl<MachineCombinerPattern> &Patterns) const { +/// Find instructions that can be turned into madd. +static bool getMaddPatterns(MachineInstr &Root, + SmallVectorImpl<MachineCombinerPattern> &Patterns) { unsigned Opc = Root.getOpcode(); MachineBasicBlock &MBB = *Root.getParent(); bool Found = false; @@ -2600,6 +2621,20 @@ bool AArch64InstrInfo::getMachineCombinerPatterns( return Found; } +/// Return true when there is potentially a faster code sequence for an +/// instruction chain ending in \p Root. All potential patterns are listed in +/// the \p Pattern vector. Pattern should be sorted in priority order since the +/// pattern evaluator stops checking as soon as it finds a faster sequence. + +bool AArch64InstrInfo::getMachineCombinerPatterns( + MachineInstr &Root, + SmallVectorImpl<MachineCombinerPattern> &Patterns) const { + if (getMaddPatterns(Root, Patterns)) + return true; + + return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); +} + /// genMadd - Generate madd instruction and combine mul and add. /// Example: /// MUL I=A,B,0 @@ -2713,8 +2748,10 @@ void AArch64InstrInfo::genAlternativeCodeSequence( unsigned Opc; switch (Pattern) { default: - // signal error. - break; + // Reassociate instructions. + TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, + DelInstrs, InstrIdxForVirtReg); + return; case MachineCombinerPattern::MULADDW_OP1: case MachineCombinerPattern::MULADDX_OP1: // MUL I=A,B,0 diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h index ae02822..b5bb446 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -169,7 +169,9 @@ public: bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns) const override; - + /// Return true when Inst is associative and commutative so that it can be + /// reassociated. + bool isAssociativeAndCommutative(const MachineInstr &Inst) const override; /// When getMachineCombinerPatterns() finds patterns, this function generates /// the instructions that could replace the original code sequence void genAlternativeCodeSequence( diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h index 8c3cb56..5d00e1c 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -20,8 +20,10 @@ class AMDGPUInstrPrinter; class AMDGPUSubtarget; class AMDGPUTargetMachine; class FunctionPass; +class MachineSchedContext; class MCAsmInfo; class raw_ostream; +class ScheduleDAGInstrs; class Target; class TargetMachine; @@ -49,6 +51,8 @@ FunctionPass *createSIFixSGPRLiveRangesPass(); FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); FunctionPass *createSIInsertWaits(TargetMachine &tm); +ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C); + ModulePass *createAMDGPUAnnotateKernelFeaturesPass(); void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); extern char &AMDGPUAnnotateKernelFeaturesID; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 9c37902..1239dfb2 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -91,6 +91,25 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) : AsmPrinter(TM, std::move(Streamer)) {} +void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { + if (TM.getTargetTriple().getOS() != Triple::AMDHSA) + return; + + // Need to construct an MCSubtargetInfo here in case we have no functions + // in the module. + std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( + TM.getTargetTriple().str(), TM.getTargetCPU(), + TM.getTargetFeatureString())); + + AMDGPUTargetStreamer *TS = + static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer()); + + TS->EmitDirectiveHSACodeObjectVersion(1, 0); + AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits()); + TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping, + "AMD", "AMDGPU"); +} + void AMDGPUAsmPrinter::EmitFunctionBodyStart() { const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); SIProgramInfo KernelInfo; @@ -148,11 +167,15 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { TS->EmitAMDGPUHsaProgramScopeGlobal(GV->getName()); } + MCSymbolELF *GVSym = cast<MCSymbolELF>(getSymbol(GV)); const DataLayout &DL = getDataLayout(); + + // Emit the size + uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType()); + OutStreamer->emitELFSize(GVSym, MCConstantExpr::create(Size, OutContext)); OutStreamer->PushSection(); OutStreamer->SwitchSection( getObjFileLowering().SectionForGlobal(GV, *Mang, TM)); - MCSymbol *GVSym = getSymbol(GV); const Constant *C = GV->getInitializer(); OutStreamer->EmitLabel(GVSym); EmitGlobalConstant(DL, C); @@ -178,13 +201,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { if (!STM.isAmdHsaOS()) { EmitProgramInfoSI(MF, KernelInfo); } - // Emit directives - AMDGPUTargetStreamer *TS = - static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer()); - TS->EmitDirectiveHSACodeObjectVersion(1, 0); - AMDGPU::IsaVersion ISA = STM.getIsaVersion(); - TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping, - "AMD", "AMDGPU"); } else { EmitProgramInfoR600(MF); } @@ -417,16 +433,24 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, } } - if (VCCUsed || FlatUsed || STM.isXNACKEnabled()) { - MaxSGPR += 2; + unsigned ExtraSGPRs = 0; - if (FlatUsed) - MaxSGPR += 2; + if (VCCUsed) + ExtraSGPRs = 2; + if (STM.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (FlatUsed) + ExtraSGPRs = 4; + } else { if (STM.isXNACKEnabled()) - MaxSGPR += 2; + ExtraSGPRs = 4; + + if (FlatUsed) + ExtraSGPRs = 6; } + MaxSGPR += ExtraSGPRs; + // We found the maximum register index. They start at 0, so add one to get the // number of registers. ProgInfo.NumVGPR = MaxVGPR + 1; @@ -563,7 +587,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4); OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4); OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4); - OutStreamer->EmitIntValue(MFI->PSInputAddr, 4); + OutStreamer->EmitIntValue(MFI->PSInputEna, 4); + OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4); + OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4); } } diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 817cbfc..99d4091 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -103,6 +103,8 @@ public: void EmitGlobalVariable(const GlobalVariable *GV) override; + void EmitStartOfAsmFile(Module &M) override; + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) override; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index 6ffa7a0..b0db261 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -20,28 +20,83 @@ def CC_SI : CallingConv<[ CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[ SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, - SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21 + SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, + SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31, + SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39 ]>>>, CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow< - [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ], - [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ] + [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14, + SGPR16, SGPR18, SGPR20, SGPR22, SGPR24, SGPR26, SGPR28, SGPR30, + SGPR32, SGPR34, SGPR36, SGPR38 ], + [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15, + SGPR17, SGPR19, SGPR21, SGPR23, SGPR25, SGPR27, SGPR29, SGPR31, + SGPR33, SGPR35, SGPR37, SGPR39 ] >>>, + // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs. CCIfNotInReg<CCIfType<[f32, i32] , CCAssignToReg<[ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, - VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31 + VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31, + VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39, + VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47, + VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55, + VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63, + VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71, + VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79, + VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87, + VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95, + VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103, + VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111, + VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119, + VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127, + VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135 ]>>>, CCIfByVal<CCIfType<[i64] , CCAssignToRegWithShadow< - [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ], - [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ] + [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14, + SGPR16, SGPR18, SGPR20, SGPR22, SGPR24, SGPR26, SGPR28, SGPR30, + SGPR32, SGPR34, SGPR36, SGPR38 ], + [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15, + SGPR17, SGPR19, SGPR21, SGPR23, SGPR25, SGPR27, SGPR29, SGPR31, + SGPR33, SGPR35, SGPR37, SGPR39 ] >>> ]>; +def RetCC_SI : CallingConv<[ + CCIfType<[i32] , CCAssignToReg<[ + SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, + SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, + SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, + SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31, + SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39 + ]>>, + + // 32*4 + 4 is the minimum for a fetch shader with 32 outputs. + CCIfType<[f32] , CCAssignToReg<[ + VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, + VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, + VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, + VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31, + VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39, + VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47, + VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55, + VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63, + VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71, + VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79, + VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87, + VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95, + VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103, + VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111, + VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119, + VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127, + VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135 + ]>> +]>; + // Calling convention for R600 def CC_R600 : CallingConv<[ CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[ diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 222f631..1a59a46 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -282,12 +282,19 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, setOperationAction(ISD::SMAX, MVT::i32, Legal); setOperationAction(ISD::UMAX, MVT::i32, Legal); - if (!Subtarget->hasFFBH()) + if (Subtarget->hasFFBH()) + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); + else setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); if (!Subtarget->hasFFBL()) setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); + + setOperationAction(ISD::CTLZ, MVT::i64, Custom); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); + static const MVT::SimpleValueType VectorIntTypes[] = { MVT::v2i32, MVT::v4i32 }; @@ -565,6 +572,12 @@ void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State, State.AnalyzeFormalArguments(Ins, CC_AMDGPU); } +void AMDGPUTargetLowering::AnalyzeReturn(CCState &State, + const SmallVectorImpl<ISD::OutputArg> &Outs) const { + + State.AnalyzeReturn(Outs, RetCC_SI); +} + SDValue AMDGPUTargetLowering::LowerReturn( SDValue Chain, CallingConv::ID CallConv, @@ -633,6 +646,9 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + return LowerCTLZ(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); } return Op; @@ -2159,6 +2175,145 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); } +SDValue AMDGPUTargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + SDValue Src = Op.getOperand(0); + bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF; + + if (ZeroUndef && Src.getValueType() == MVT::i32) + return DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, Src); + + SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src); + + const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); + const SDValue One = DAG.getConstant(1, SL, MVT::i32); + + SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero); + SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One); + + EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), MVT::i32); + + SDValue Hi0 = DAG.getSetCC(SL, SetCCVT, Hi, Zero, ISD::SETEQ); + + SDValue CtlzLo = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i32, Lo); + SDValue CtlzHi = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i32, Hi); + + const SDValue Bits32 = DAG.getConstant(32, SL, MVT::i32); + SDValue Add = DAG.getNode(ISD::ADD, SL, MVT::i32, CtlzLo, Bits32); + + // ctlz(x) = hi_32(x) == 0 ? ctlz(lo_32(x)) + 32 : ctlz(hi_32(x)) + SDValue NewCtlz = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0, Add, CtlzHi); + + if (!ZeroUndef) { + // Test if the full 64-bit input is zero. + + // FIXME: DAG combines turn what should be an s_and_b64 into a v_or_b32, + // which we probably don't want. + SDValue Lo0 = DAG.getSetCC(SL, SetCCVT, Lo, Zero, ISD::SETEQ); + SDValue SrcIsZero = DAG.getNode(ISD::AND, SL, SetCCVT, Lo0, Hi0); + + // TODO: If i64 setcc is half rate, it can result in 1 fewer instruction + // with the same cycles, otherwise it is slower. + // SDValue SrcIsZero = DAG.getSetCC(SL, SetCCVT, Src, + // DAG.getConstant(0, SL, MVT::i64), ISD::SETEQ); + + const SDValue Bits32 = DAG.getConstant(64, SL, MVT::i32); + + // The instruction returns -1 for 0 input, but the defined intrinsic + // behavior is to return the number of bits. + NewCtlz = DAG.getNode(ISD::SELECT, SL, MVT::i32, + SrcIsZero, Bits32, NewCtlz); + } + + return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewCtlz); +} + +SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, + bool Signed) const { + // Unsigned + // cul2f(ulong u) + //{ + // uint lz = clz(u); + // uint e = (u != 0) ? 127U + 63U - lz : 0; + // u = (u << lz) & 0x7fffffffffffffffUL; + // ulong t = u & 0xffffffffffUL; + // uint v = (e << 23) | (uint)(u >> 40); + // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U); + // return as_float(v + r); + //} + // Signed + // cl2f(long l) + //{ + // long s = l >> 63; + // float r = cul2f((l + s) ^ s); + // return s ? -r : r; + //} + + SDLoc SL(Op); + SDValue Src = Op.getOperand(0); + SDValue L = Src; + + SDValue S; + if (Signed) { + const SDValue SignBit = DAG.getConstant(63, SL, MVT::i64); + S = DAG.getNode(ISD::SRA, SL, MVT::i64, L, SignBit); + + SDValue LPlusS = DAG.getNode(ISD::ADD, SL, MVT::i64, L, S); + L = DAG.getNode(ISD::XOR, SL, MVT::i64, LPlusS, S); + } + + EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), MVT::f32); + + + SDValue ZeroI32 = DAG.getConstant(0, SL, MVT::i32); + SDValue ZeroI64 = DAG.getConstant(0, SL, MVT::i64); + SDValue LZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i64, L); + LZ = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LZ); + + SDValue K = DAG.getConstant(127U + 63U, SL, MVT::i32); + SDValue E = DAG.getSelect(SL, MVT::i32, + DAG.getSetCC(SL, SetCCVT, L, ZeroI64, ISD::SETNE), + DAG.getNode(ISD::SUB, SL, MVT::i32, K, LZ), + ZeroI32); + + SDValue U = DAG.getNode(ISD::AND, SL, MVT::i64, + DAG.getNode(ISD::SHL, SL, MVT::i64, L, LZ), + DAG.getConstant((-1ULL) >> 1, SL, MVT::i64)); + + SDValue T = DAG.getNode(ISD::AND, SL, MVT::i64, U, + DAG.getConstant(0xffffffffffULL, SL, MVT::i64)); + + SDValue UShl = DAG.getNode(ISD::SRL, SL, MVT::i64, + U, DAG.getConstant(40, SL, MVT::i64)); + + SDValue V = DAG.getNode(ISD::OR, SL, MVT::i32, + DAG.getNode(ISD::SHL, SL, MVT::i32, E, DAG.getConstant(23, SL, MVT::i32)), + DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, UShl)); + + SDValue C = DAG.getConstant(0x8000000000ULL, SL, MVT::i64); + SDValue RCmp = DAG.getSetCC(SL, SetCCVT, T, C, ISD::SETUGT); + SDValue TCmp = DAG.getSetCC(SL, SetCCVT, T, C, ISD::SETEQ); + + SDValue One = DAG.getConstant(1, SL, MVT::i32); + + SDValue VTrunc1 = DAG.getNode(ISD::AND, SL, MVT::i32, V, One); + + SDValue R = DAG.getSelect(SL, MVT::i32, + RCmp, + One, + DAG.getSelect(SL, MVT::i32, TCmp, VTrunc1, ZeroI32)); + R = DAG.getNode(ISD::ADD, SL, MVT::i32, V, R); + R = DAG.getNode(ISD::BITCAST, SL, MVT::f32, R); + + if (!Signed) + return R; + + SDValue RNeg = DAG.getNode(ISD::FNEG, SL, MVT::f32, R); + return DAG.getSelect(SL, MVT::f32, DAG.getSExtOrTrunc(S, SL, SetCCVT), RNeg, R); +} + SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const { SDLoc SL(Op); @@ -2184,35 +2339,29 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { - SDValue S0 = Op.getOperand(0); - if (S0.getValueType() != MVT::i64) - return SDValue(); + assert(Op.getOperand(0).getValueType() == MVT::i64 && + "operation should be legal"); EVT DestVT = Op.getValueType(); if (DestVT == MVT::f64) return LowerINT_TO_FP64(Op, DAG, false); - assert(DestVT == MVT::f32); - - SDLoc DL(Op); + if (DestVT == MVT::f32) + return LowerINT_TO_FP32(Op, DAG, false); - // f32 uint_to_fp i64 - SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0, - DAG.getConstant(0, DL, MVT::i32)); - SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo); - SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0, - DAG.getConstant(1, DL, MVT::i32)); - SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi); - // TODO: Should this propagate fast-math-flags? - FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi, - DAG.getConstantFP(4294967296.0f, DL, MVT::f32)); // 2^32 - return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi); + return SDValue(); } SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { - SDValue Src = Op.getOperand(0); - if (Src.getValueType() == MVT::i64 && Op.getValueType() == MVT::f64) + assert(Op.getOperand(0).getValueType() == MVT::i64 && + "operation should be legal"); + + EVT DestVT = Op.getValueType(); + if (DestVT == MVT::f32) + return LowerINT_TO_FP32(Op, DAG, true); + + if (DestVT == MVT::f64) return LowerINT_TO_FP64(Op, DAG, true); return SDValue(); @@ -2447,6 +2596,97 @@ SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N, return DAG.getSExtOrTrunc(Mul, DL, VT); } +static bool isNegativeOne(SDValue Val) { + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) + return C->isAllOnesValue(); + return false; +} + +static bool isCtlzOpc(unsigned Opc) { + return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF; +} + +// Get FFBH node if the incoming op may have been type legalized from a smaller +// type VT. +// Need to match pre-legalized type because the generic legalization inserts the +// add/sub between the select and compare. +static SDValue getFFBH_U32(const TargetLowering &TLI, + SelectionDAG &DAG, SDLoc SL, SDValue Op) { + EVT VT = Op.getValueType(); + EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + if (LegalVT != MVT::i32) + return SDValue(); + + if (VT != MVT::i32) + Op = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Op); + + SDValue FFBH = DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, Op); + if (VT != MVT::i32) + FFBH = DAG.getNode(ISD::TRUNCATE, SL, VT, FFBH); + + return FFBH; +} + +// The native instructions return -1 on 0 input. Optimize out a select that +// produces -1 on 0. +// +// TODO: If zero is not undef, we could also do this if the output is compared +// against the bitwidth. +// +// TODO: Should probably combine against FFBH_U32 instead of ctlz directly. +SDValue AMDGPUTargetLowering::performCtlzCombine(SDLoc SL, + SDValue Cond, + SDValue LHS, + SDValue RHS, + DAGCombinerInfo &DCI) const { + ConstantSDNode *CmpRhs = dyn_cast<ConstantSDNode>(Cond.getOperand(1)); + if (!CmpRhs || !CmpRhs->isNullValue()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); + SDValue CmpLHS = Cond.getOperand(0); + + // select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x + if (CCOpcode == ISD::SETEQ && + isCtlzOpc(RHS.getOpcode()) && + RHS.getOperand(0) == CmpLHS && + isNegativeOne(LHS)) { + return getFFBH_U32(*this, DAG, SL, CmpLHS); + } + + // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x + if (CCOpcode == ISD::SETNE && + isCtlzOpc(LHS.getOpcode()) && + LHS.getOperand(0) == CmpLHS && + isNegativeOne(RHS)) { + return getFFBH_U32(*this, DAG, SL, CmpLHS); + } + + return SDValue(); +} + +SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SDValue Cond = N->getOperand(0); + if (Cond.getOpcode() != ISD::SETCC) + return SDValue(); + + EVT VT = N->getValueType(0); + SDValue LHS = Cond.getOperand(0); + SDValue RHS = Cond.getOperand(1); + SDValue CC = Cond.getOperand(2); + + SDValue True = N->getOperand(1); + SDValue False = N->getOperand(2); + + if (VT == MVT::f32 && Cond.hasOneUse()) + return CombineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI); + + // There's no reason to not do this if the condition has other uses. + return performCtlzCombine(SDLoc(N), Cond, True, False, DCI); +} + SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -2471,23 +2711,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, simplifyI24(N1, DCI); return SDValue(); } - case ISD::SELECT: { - SDValue Cond = N->getOperand(0); - if (Cond.getOpcode() == ISD::SETCC && Cond.hasOneUse()) { - EVT VT = N->getValueType(0); - SDValue LHS = Cond.getOperand(0); - SDValue RHS = Cond.getOperand(1); - SDValue CC = Cond.getOperand(2); - - SDValue True = N->getOperand(1); - SDValue False = N->getOperand(2); - - if (VT == MVT::f32) - return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI); - } - - break; - } + case ISD::SELECT: + return performSelectCombine(N, DCI); case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { assert(!N->getValueType(0).isVector() && @@ -2699,6 +2924,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BFE_I32) NODE_NAME_CASE(BFI) NODE_NAME_CASE(BFM) + NODE_NAME_CASE(FFBH_U32) NODE_NAME_CASE(MUL_U24) NODE_NAME_CASE(MUL_I24) NODE_NAME_CASE(MAD_U24) diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 7314cc0..3792541 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -54,6 +54,9 @@ private: SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const; SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const; SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; @@ -67,6 +70,9 @@ private: SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performCtlzCombine(SDLoc SL, SDValue Cond, SDValue LHS, SDValue RHS, + DAGCombinerInfo &DCI) const; + SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const; protected: static EVT getEquivalentMemType(LLVMContext &Context, EVT VT); @@ -109,6 +115,8 @@ protected: SmallVectorImpl<ISD::InputArg> &OrigIns) const; void AnalyzeFormalArguments(CCState &State, const SmallVectorImpl<ISD::InputArg> &Ins) const; + void AnalyzeReturn(CCState &State, + const SmallVectorImpl<ISD::OutputArg> &Outs) const; public: AMDGPUTargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI); @@ -263,6 +271,7 @@ enum NodeType : unsigned { BFE_I32, // Extract range of bits with sign extension to 32-bits. BFI, // (src0 & src1) | (~src0 & src2) BFM, // Insert a range of bits into a 32-bit word. + FFBH_U32, // ctlz with -1 if input is zero. MUL_U24, MUL_I24, MAD_U24, diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 70e589c..575dfe4 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -191,6 +191,8 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>; def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>; def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>; +def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>; + // Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when // performing the mulitply. The result is a 32-bit value. def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp, @@ -240,4 +242,4 @@ def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChai // Call/Return DAG Nodes //===----------------------------------------------------------------------===// def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 22f85b3..b1be619 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -66,8 +66,12 @@ static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { } static MachineSchedRegistry -SchedCustomRegistry("r600", "Run R600's custom scheduler", - createR600MachineScheduler); +R600SchedRegistry("r600", "Run R600's custom scheduler", + createR600MachineScheduler); + +static MachineSchedRegistry +SISchedRegistry("si", "Run SI's custom scheduler", + createSIMachineScheduler); static std::string computeDataLayout(const Triple &TT) { std::string Ret = "e-p:32:32"; diff --git a/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td index 779a14e..2245f14 100644 --- a/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td @@ -349,7 +349,7 @@ def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>; def ADDC_UINT : R600_2OP_Helper <0x52, "ADDC_UINT", AMDGPUcarry>; def SUBB_UINT : R600_2OP_Helper <0x53, "SUBB_UINT", AMDGPUborrow>; -def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", ctlz_zero_undef, VecALU>; +def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", AMDGPUffbh_u32, VecALU>; def FFBL_INT : R600_1OP_Helper <0xAC, "FFBL_INT", cttz_zero_undef, VecALU>; let hasSideEffects = 1 in { diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 68b1d1a..4bc80a0 100644 --- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -28,7 +28,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() { //===--- Global Variable Emission Directives --------------------------===// HasAggressiveSymbolFolding = true; COMMDirectiveAlignmentIsInBytes = false; - HasDotTypeDotSizeDirective = false; HasNoDeadStrip = true; WeakRefDirective = ".weakref\t"; //===--- Dwarf Emission Directives -----------------------------------===// diff --git a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h index 7f79dd3..aa1e352 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h @@ -137,7 +137,7 @@ namespace SIOutMods { #define C_00B84C_EXCP_EN #define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC - +#define R_0286D0_SPI_PS_INPUT_ADDR 0x0286D0 #define R_00B848_COMPUTE_PGM_RSRC1 0x00B848 #define S_00B848_VGPRS(x) (((x) & 0x3F) << 0) diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 96e37c5..f59d994 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -215,7 +215,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) { unsigned SrcReg = MI.getOperand(I).getReg(); - unsigned SrcSubReg = MI.getOperand(I).getReg(); + unsigned SrcSubReg = MI.getOperand(I).getSubReg(); const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); assert(TRI->isSGPRClass(SrcRC) && diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 02a3930..6230d1e 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -334,12 +334,20 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { !MRI.hasOneUse(MI.getOperand(0).getReg())) continue; - // FIXME: Fold operands with subregs. if (OpToFold.isReg() && - (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) || - OpToFold.getSubReg())) + !TargetRegisterInfo::isVirtualRegister(OpToFold.getReg())) continue; + // Prevent folding operands backwards in the function. For example, + // the COPY opcode must not be replaced by 1 in this example: + // + // %vreg3<def> = COPY %VGPR0; VGPR_32:%vreg3 + // ... + // %VGPR0<def> = V_MOV_B32_e32 1, %EXEC<imp-use> + MachineOperand &Dst = MI.getOperand(0); + if (Dst.isReg() && + !TargetRegisterInfo::isVirtualRegister(Dst.getReg())) + continue; // We need mutate the operands of new mov instructions to add implicit // uses of EXEC, but adding them invalidates the use_iterator, so defer diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 0e043cb..5448675 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -259,7 +259,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setTargetDAGCombine(ISD::SMAX); setTargetDAGCombine(ISD::UMIN); setTargetDAGCombine(ISD::UMAX); - setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); @@ -598,18 +597,20 @@ SDValue SITargetLowering::LowerFormalArguments( // First check if it's a PS input addr if (Info->getShaderType() == ShaderType::PIXEL && !Arg.Flags.isInReg() && - !Arg.Flags.isByVal()) { + !Arg.Flags.isByVal() && PSInputNum <= 15) { - assert((PSInputNum <= 15) && "Too many PS inputs!"); - - if (!Arg.Used) { + if (!Arg.Used && !Info->isPSInputAllocated(PSInputNum)) { // We can safely skip PS inputs Skipped.set(i); ++PSInputNum; continue; } - Info->PSInputAddr |= 1 << PSInputNum++; + Info->markPSInputAllocated(PSInputNum); + if (Arg.Used) + Info->PSInputEna |= 1 << PSInputNum; + + ++PSInputNum; } // Second split vertices into their elements @@ -639,11 +640,25 @@ SDValue SITargetLowering::LowerFormalArguments( *DAG.getContext()); // At least one interpolation mode must be enabled or else the GPU will hang. + // + // Check PSInputAddr instead of PSInputEna. The idea is that if the user set + // PSInputAddr, the user wants to enable some bits after the compilation + // based on run-time states. Since we can't know what the final PSInputEna + // will look like, so we shouldn't do anything here and the user should take + // responsibility for the correct programming. + // + // Otherwise, the following restrictions apply: + // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled. + // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be + // enabled too. if (Info->getShaderType() == ShaderType::PIXEL && - (Info->PSInputAddr & 0x7F) == 0) { - Info->PSInputAddr |= 1; + ((Info->getPSInputAddr() & 0x7F) == 0 || + ((Info->getPSInputAddr() & 0xF) == 0 && + Info->isPSInputAllocated(11)))) { CCInfo.AllocateReg(AMDGPU::VGPR0); CCInfo.AllocateReg(AMDGPU::VGPR1); + Info->markPSInputAllocated(0); + Info->PSInputEna |= 1; } if (Info->getShaderType() == ShaderType::COMPUTE) { @@ -872,6 +887,97 @@ SDValue SITargetLowering::LowerFormalArguments( return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } +SDValue SITargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + SDLoc DL, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); + + if (Info->getShaderType() == ShaderType::COMPUTE) + return AMDGPUTargetLowering::LowerReturn(Chain, CallConv, isVarArg, Outs, + OutVals, DL, DAG); + + Info->setIfReturnsVoid(Outs.size() == 0); + + SmallVector<ISD::OutputArg, 48> Splits; + SmallVector<SDValue, 48> SplitVals; + + // Split vectors into their elements. + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + const ISD::OutputArg &Out = Outs[i]; + + if (Out.VT.isVector()) { + MVT VT = Out.VT.getVectorElementType(); + ISD::OutputArg NewOut = Out; + NewOut.Flags.setSplit(); + NewOut.VT = VT; + + // We want the original number of vector elements here, e.g. + // three or five, not four or eight. + unsigned NumElements = Out.ArgVT.getVectorNumElements(); + + for (unsigned j = 0; j != NumElements; ++j) { + SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, OutVals[i], + DAG.getConstant(j, DL, MVT::i32)); + SplitVals.push_back(Elem); + Splits.push_back(NewOut); + NewOut.PartOffset += NewOut.VT.getStoreSize(); + } + } else { + SplitVals.push_back(OutVals[i]); + Splits.push_back(Out); + } + } + + // CCValAssign - represent the assignment of the return value to a location. + SmallVector<CCValAssign, 48> RVLocs; + + // CCState - Info about the registers and stack slots. + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); + + // Analyze outgoing return values. + AnalyzeReturn(CCInfo, Splits); + + SDValue Flag; + SmallVector<SDValue, 48> RetOps; + RetOps.push_back(Chain); // Operand #0 = Chain (updated below) + + // Copy the result values into the output registers. + for (unsigned i = 0, realRVLocIdx = 0; + i != RVLocs.size(); + ++i, ++realRVLocIdx) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + + SDValue Arg = SplitVals[realRVLocIdx]; + + // Copied from other backends. + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); + break; + } + + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag); + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } + + // Update chain and glue. + RetOps[0] = Chain; + if (Flag.getNode()) + RetOps.push_back(Flag); + + return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, RetOps); +} + MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MachineInstr * MI, MachineBasicBlock * BB) const { @@ -1158,6 +1264,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, switch (IntrinsicID) { case Intrinsic::amdgcn_dispatch_ptr: + if (!Subtarget->isAmdHsaOS()) { + DiagnosticInfoUnsupported BadIntrin(*MF.getFunction(), + "hsa intrinsic without hsa target"); + DAG.getContext()->diagnose(BadIntrin); + return DAG.getUNDEF(VT); + } + return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR), VT); @@ -2027,7 +2140,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, case ISD::UINT_TO_FP: { return performUCharToFloatCombine(N, DCI); - + } case ISD::FADD: { if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) break; @@ -2109,7 +2222,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, break; } - } case ISD::LOAD: case ISD::STORE: case ISD::ATOMIC_LOAD: diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h index e2f8cb1..f01b2c0 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -95,6 +95,13 @@ public: SDLoc DL, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const override; + SDValue LowerReturn(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + SDLoc DL, SelectionDAG &DAG) const override; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, MachineBasicBlock * BB) const override; bool enableAggressiveFMAFusion(EVT VT) const override; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index 821aada..94e6147 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -84,6 +84,9 @@ private: bool LastInstWritesM0; + /// \brief Whether the machine function returns void + bool ReturnsVoid; + /// \brief Get increment/decrement amount for this instruction. Counters getHwCounts(MachineInstr &MI); @@ -322,7 +325,9 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, const Counters &Required) { // End of program? No need to wait on anything - if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM) + // A function not returning void needs to wait, because other bytecode will + // be appended after it and we don't know what it will be. + if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM && ReturnsVoid) return false; // Figure out if the async instructions execute in order @@ -465,6 +470,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { LastIssued = ZeroCounts; LastOpcodeType = OTHER; LastInstWritesM0 = false; + ReturnsVoid = MF.getInfo<SIMachineFunctionInfo>()->returnsVoid(); memset(&UsedRegs, 0, sizeof(UsedRegs)); memset(&DefinedRegs, 0, sizeof(DefinedRegs)); @@ -488,6 +494,14 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { // Wait for everything at the end of the MBB Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued); + + // Functions returning something shouldn't contain S_ENDPGM, because other + // bytecode will be appended after it. + if (!ReturnsVoid) { + MachineBasicBlock::iterator I = MBB.getFirstTerminator(); + if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM) + I->eraseFromParent(); + } } return Changes; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index a08a5a8..1e10d25 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1777,6 +1777,10 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, MRI.getRegClass(Reg) : RI.getPhysRegClass(Reg); + const SIRegisterInfo *TRI = + static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo()); + RC = TRI->getSubRegClass(RC, MO.getSubReg()); + // In order to be legal, the common sub-class must be equal to the // class of the current operand. For example: // @@ -3075,3 +3079,15 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const { return Rsrc23; } + +bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr *MI) const { + unsigned Opc = MI->getOpcode(); + + return isSMRD(Opc); +} + +bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr *MI) const { + unsigned Opc = MI->getOpcode(); + + return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc); +} diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 307ef67..cce1ae7 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -462,6 +462,9 @@ public: uint64_t getDefaultRsrcDataFormat() const; uint64_t getScratchRsrcWords23() const; + + bool isLowLatencyInstruction(const MachineInstr *MI) const; + bool isHighLatencyInstruction(const MachineInstr *MI) const; }; namespace AMDGPU { diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td index b7df058..89692ab 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -144,7 +144,7 @@ defm S_FF1_I32_B32 : SOP1_32 <sop1<0x13, 0x10>, "s_ff1_i32_b32", defm S_FF1_I32_B64 : SOP1_32_64 <sop1<0x14, 0x11>, "s_ff1_i32_b64", []>; defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32", - [(set i32:$dst, (ctlz_zero_undef i32:$src0))] + [(set i32:$dst, (AMDGPUffbh_u32 i32:$src0))] >; defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index bf15516..49677fc 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -46,8 +46,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), - LDSWaveSpillSize(0), PSInputAddr(0), + ReturnsVoid(true), + LDSWaveSpillSize(0), + PSInputEna(0), NumUserSGPRs(0), NumSystemSGPRs(0), HasSpilledSGPRs(false), @@ -72,6 +74,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); const Function *F = MF.getFunction(); + PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); + const MachineFrameInfo *FrameInfo = MF.getFrameInfo(); if (getShaderType() == ShaderType::COMPUTE) diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 9c528d6..846ee5d 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -57,10 +57,14 @@ class SIMachineFunctionInfo : public AMDGPUMachineFunction { unsigned WorkGroupInfoSystemSGPR; unsigned PrivateSegmentWaveByteOffsetSystemSGPR; + // Graphics info. + unsigned PSInputAddr; + bool ReturnsVoid; + public: // FIXME: Make private unsigned LDSWaveSpillSize; - unsigned PSInputAddr; + unsigned PSInputEna; std::map<unsigned, unsigned> LaneVGPRs; unsigned ScratchOffsetReg; unsigned NumUserSGPRs; @@ -273,6 +277,26 @@ public: HasSpilledVGPRs = Spill; } + unsigned getPSInputAddr() const { + return PSInputAddr; + } + + bool isPSInputAllocated(unsigned Index) const { + return PSInputAddr & (1 << Index); + } + + void markPSInputAllocated(unsigned Index) { + PSInputAddr |= 1 << Index; + } + + bool returnsVoid() const { + return ReturnsVoid; + } + + void setIfReturnsVoid(bool Value) { + ReturnsVoid = Value; + } + unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const; }; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp new file mode 100644 index 0000000..1cfa984 --- /dev/null +++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -0,0 +1,1968 @@ +//===-- SIMachineScheduler.cpp - SI Scheduler Interface -*- C++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief SI Machine Scheduler interface +// +//===----------------------------------------------------------------------===// + +#include "SIMachineScheduler.h" +#include "AMDGPUSubtarget.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/RegisterPressure.h" + +using namespace llvm; + +#define DEBUG_TYPE "misched" + +// This scheduler implements a different scheduling algorithm than +// GenericScheduler. +// +// There are several specific architecture behaviours that can't be modelled +// for GenericScheduler: +// . When accessing the result of an SGPR load instruction, you have to wait +// for all the SGPR load instructions before your current instruction to +// have finished. +// . When accessing the result of an VGPR load instruction, you have to wait +// for all the VGPR load instructions previous to the VGPR load instruction +// you are interested in to finish. +// . The less the register pressure, the best load latencies are hidden +// +// Moreover some specifities (like the fact a lot of instructions in the shader +// have few dependencies) makes the generic scheduler have some unpredictable +// behaviours. For example when register pressure becomes high, it can either +// manage to prevent register pressure from going too high, or it can +// increase register pressure even more than if it hadn't taken register +// pressure into account. +// +// Also some other bad behaviours are generated, like loading at the beginning +// of the shader a constant in VGPR you won't need until the end of the shader. +// +// The scheduling problem for SI can distinguish three main parts: +// . Hiding high latencies (texture sampling, etc) +// . Hiding low latencies (SGPR constant loading, etc) +// . Keeping register usage low for better latency hiding and general +// performance +// +// Some other things can also affect performance, but are hard to predict +// (cache usage, the fact the HW can issue several instructions from different +// wavefronts if different types, etc) +// +// This scheduler tries to solve the scheduling problem by dividing it into +// simpler sub-problems. It divides the instructions into blocks, schedules +// locally inside the blocks where it takes care of low latencies, and then +// chooses the order of the blocks by taking care of high latencies. +// Dividing the instructions into blocks helps control keeping register +// usage low. +// +// First the instructions are put into blocks. +// We want the blocks help control register usage and hide high latencies +// later. To help control register usage, we typically want all local +// computations, when for example you create a result that can be comsummed +// right away, to be contained in a block. Block inputs and outputs would +// typically be important results that are needed in several locations of +// the shader. Since we do want blocks to help hide high latencies, we want +// the instructions inside the block to have a minimal set of dependencies +// on high latencies. It will make it easy to pick blocks to hide specific +// high latencies. +// The block creation algorithm is divided into several steps, and several +// variants can be tried during the scheduling process. +// +// Second the order of the instructions inside the blocks is choosen. +// At that step we do take into account only register usage and hiding +// low latency instructions +// +// Third the block order is choosen, there we try to hide high latencies +// and keep register usage low. +// +// After the third step, a pass is done to improve the hiding of low +// latencies. +// +// Actually when talking about 'low latency' or 'high latency' it includes +// both the latency to get the cache (or global mem) data go to the register, +// and the bandwith limitations. +// Increasing the number of active wavefronts helps hide the former, but it +// doesn't solve the latter, thus why even if wavefront count is high, we have +// to try have as many instructions hiding high latencies as possible. +// The OpenCL doc says for example latency of 400 cycles for a global mem access, +// which is hidden by 10 instructions if the wavefront count is 10. + +// Some figures taken from AMD docs: +// Both texture and constant L1 caches are 4-way associative with 64 bytes +// lines. +// Constant cache is shared with 4 CUs. +// For texture sampling, the address generation unit receives 4 texture +// addresses per cycle, thus we could expect texture sampling latency to be +// equivalent to 4 instructions in the very best case (a VGPR is 64 work items, +// instructions in a wavefront group are executed every 4 cycles), +// or 16 instructions if the other wavefronts associated to the 3 other VALUs +// of the CU do texture sampling too. (Don't take these figures too seriously, +// as I'm not 100% sure of the computation) +// Data exports should get similar latency. +// For constant loading, the cache is shader with 4 CUs. +// The doc says "a throughput of 16B/cycle for each of the 4 Compute Unit" +// I guess if the other CU don't read the cache, it can go up to 64B/cycle. +// It means a simple s_buffer_load should take one instruction to hide, as +// well as a s_buffer_loadx2 and potentially a s_buffer_loadx8 if on the same +// cache line. +// +// As of today the driver doesn't preload the constants in cache, thus the +// first loads get extra latency. The doc says global memory access can be +// 300-600 cycles. We do not specially take that into account when scheduling +// As we expect the driver to be able to preload the constants soon. + + +// common code // + +#ifndef NDEBUG + +static const char *getReasonStr(SIScheduleCandReason Reason) { + switch (Reason) { + case NoCand: return "NOCAND"; + case RegUsage: return "REGUSAGE"; + case Latency: return "LATENCY"; + case Successor: return "SUCCESSOR"; + case Depth: return "DEPTH"; + case NodeOrder: return "ORDER"; + } + llvm_unreachable("Unknown reason!"); +} + +#endif + +static bool tryLess(int TryVal, int CandVal, + SISchedulerCandidate &TryCand, + SISchedulerCandidate &Cand, + SIScheduleCandReason Reason) { + if (TryVal < CandVal) { + TryCand.Reason = Reason; + return true; + } + if (TryVal > CandVal) { + if (Cand.Reason > Reason) + Cand.Reason = Reason; + return true; + } + Cand.setRepeat(Reason); + return false; +} + +static bool tryGreater(int TryVal, int CandVal, + SISchedulerCandidate &TryCand, + SISchedulerCandidate &Cand, + SIScheduleCandReason Reason) { + if (TryVal > CandVal) { + TryCand.Reason = Reason; + return true; + } + if (TryVal < CandVal) { + if (Cand.Reason > Reason) + Cand.Reason = Reason; + return true; + } + Cand.setRepeat(Reason); + return false; +} + +// SIScheduleBlock // + +void SIScheduleBlock::addUnit(SUnit *SU) { + NodeNum2Index[SU->NodeNum] = SUnits.size(); + SUnits.push_back(SU); +} + +#ifndef NDEBUG + +void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) { + + dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); + dbgs() << '\n'; +} +#endif + +void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand, + SISchedCandidate &TryCand) { + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; + return; + } + + if (Cand.SGPRUsage > 60 && + tryLess(TryCand.SGPRUsage, Cand.SGPRUsage, TryCand, Cand, RegUsage)) + return; + + // Schedule low latency instructions as top as possible. + // Order of priority is: + // . Low latency instructions which do not depend on other low latency + // instructions we haven't waited for + // . Other instructions which do not depend on low latency instructions + // we haven't waited for + // . Low latencies + // . All other instructions + // Goal is to get: low latency instructions - independant instructions + // - (eventually some more low latency instructions) + // - instructions that depend on the first low latency instructions. + // If in the block there is a lot of constant loads, the SGPR usage + // could go quite high, thus above the arbitrary limit of 60 will encourage + // use the already loaded constants (in order to release some SGPRs) before + // loading more. + if (tryLess(TryCand.HasLowLatencyNonWaitedParent, + Cand.HasLowLatencyNonWaitedParent, + TryCand, Cand, SIScheduleCandReason::Depth)) + return; + + if (tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency, + TryCand, Cand, SIScheduleCandReason::Depth)) + return; + + if (TryCand.IsLowLatency && + tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset, + TryCand, Cand, SIScheduleCandReason::Depth)) + return; + + if (tryLess(TryCand.VGPRUsage, Cand.VGPRUsage, TryCand, Cand, RegUsage)) + return; + + // Fall through to original instruction order. + if (TryCand.SU->NodeNum < Cand.SU->NodeNum) { + TryCand.Reason = NodeOrder; + } +} + +SUnit* SIScheduleBlock::pickNode() { + SISchedCandidate TopCand; + + for (SUnit* SU : TopReadySUs) { + SISchedCandidate TryCand; + std::vector<unsigned> pressure; + std::vector<unsigned> MaxPressure; + // Predict register usage after this instruction. + TryCand.SU = SU; + TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure); + TryCand.SGPRUsage = pressure[DAG->getSGPRSetID()]; + TryCand.VGPRUsage = pressure[DAG->getVGPRSetID()]; + TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum]; + TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum]; + TryCand.HasLowLatencyNonWaitedParent = + HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]]; + tryCandidateTopDown(TopCand, TryCand); + if (TryCand.Reason != NoCand) + TopCand.setBest(TryCand); + } + + return TopCand.SU; +} + + +// Schedule something valid. +void SIScheduleBlock::fastSchedule() { + TopReadySUs.clear(); + if (Scheduled) + undoSchedule(); + + for (SUnit* SU : SUnits) { + if (!SU->NumPredsLeft) + TopReadySUs.push_back(SU); + } + + while (!TopReadySUs.empty()) { + SUnit *SU = TopReadySUs[0]; + ScheduledSUnits.push_back(SU); + nodeScheduled(SU); + } + + Scheduled = true; +} + +// Returns if the register was set between first and last. +static bool isDefBetween(unsigned Reg, + SlotIndex First, SlotIndex Last, + const MachineRegisterInfo *MRI, + const LiveIntervals *LIS) { + for (MachineRegisterInfo::def_instr_iterator + UI = MRI->def_instr_begin(Reg), + UE = MRI->def_instr_end(); UI != UE; ++UI) { + const MachineInstr* MI = &*UI; + if (MI->isDebugValue()) + continue; + SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot(); + if (InstSlot >= First && InstSlot <= Last) + return true; + } + return false; +} + +void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock, + MachineBasicBlock::iterator EndBlock) { + IntervalPressure Pressure, BotPressure; + RegPressureTracker RPTracker(Pressure), BotRPTracker(BotPressure); + LiveIntervals *LIS = DAG->getLIS(); + MachineRegisterInfo *MRI = DAG->getMRI(); + DAG->initRPTracker(TopRPTracker); + DAG->initRPTracker(BotRPTracker); + DAG->initRPTracker(RPTracker); + + // Goes though all SU. RPTracker captures what had to be alive for the SUs + // to execute, and what is still alive at the end. + for (SUnit* SU : ScheduledSUnits) { + RPTracker.setPos(SU->getInstr()); + RPTracker.advance(); + } + + // Close the RPTracker to finalize live ins/outs. + RPTracker.closeRegion(); + + // Initialize the live ins and live outs. + TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); + BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); + + // Do not Track Physical Registers, because it messes up. + for (unsigned Reg : RPTracker.getPressure().LiveInRegs) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) + LiveInRegs.insert(Reg); + } + LiveOutRegs.clear(); + // There is several possibilities to distinguish: + // 1) Reg is not input to any instruction in the block, but is output of one + // 2) 1) + read in the block and not needed after it + // 3) 1) + read in the block but needed in another block + // 4) Reg is input of an instruction but another block will read it too + // 5) Reg is input of an instruction and then rewritten in the block. + // result is not read in the block (implies used in another block) + // 6) Reg is input of an instruction and then rewritten in the block. + // result is read in the block and not needed in another block + // 7) Reg is input of an instruction and then rewritten in the block. + // result is read in the block but also needed in another block + // LiveInRegs will contains all the regs in situation 4, 5, 6, 7 + // We want LiveOutRegs to contain only Regs whose content will be read after + // in another block, and whose content was written in the current block, + // that is we want it to get 1, 3, 5, 7 + // Since we made the MIs of a block to be packed all together before + // scheduling, then the LiveIntervals were correct, and the RPTracker was + // able to correctly handle 5 vs 6, 2 vs 3. + // (Note: This is not sufficient for RPTracker to not do mistakes for case 4) + // The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7 + // Comparing to LiveInRegs is not sufficient to differenciate 4 vs 5, 7 + // The use of findDefBetween removes the case 4. + for (unsigned Reg : RPTracker.getPressure().LiveOutRegs) { + if (TargetRegisterInfo::isVirtualRegister(Reg) && + isDefBetween(Reg, LIS->getInstructionIndex(BeginBlock).getRegSlot(), + LIS->getInstructionIndex(EndBlock).getRegSlot(), + MRI, LIS)) { + LiveOutRegs.insert(Reg); + } + } + + // Pressure = sum_alive_registers register size + // Internally llvm will represent some registers as big 128 bits registers + // for example, but they actually correspond to 4 actual 32 bits registers. + // Thus Pressure is not equal to num_alive_registers * constant. + LiveInPressure = TopPressure.MaxSetPressure; + LiveOutPressure = BotPressure.MaxSetPressure; + + // Prepares TopRPTracker for top down scheduling. + TopRPTracker.closeTop(); +} + +void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock, + MachineBasicBlock::iterator EndBlock) { + if (!Scheduled) + fastSchedule(); + + // PreScheduling phase to set LiveIn and LiveOut. + initRegPressure(BeginBlock, EndBlock); + undoSchedule(); + + // Schedule for real now. + + TopReadySUs.clear(); + + for (SUnit* SU : SUnits) { + if (!SU->NumPredsLeft) + TopReadySUs.push_back(SU); + } + + while (!TopReadySUs.empty()) { + SUnit *SU = pickNode(); + ScheduledSUnits.push_back(SU); + TopRPTracker.setPos(SU->getInstr()); + TopRPTracker.advance(); + nodeScheduled(SU); + } + + // TODO: compute InternalAdditionnalPressure. + InternalAdditionnalPressure.resize(TopPressure.MaxSetPressure.size()); + + // Check everything is right. +#ifndef NDEBUG + assert(SUnits.size() == ScheduledSUnits.size() && + TopReadySUs.empty()); + for (SUnit* SU : SUnits) { + assert(SU->isScheduled && + SU->NumPredsLeft == 0); + } +#endif + + Scheduled = true; +} + +void SIScheduleBlock::undoSchedule() { + for (SUnit* SU : SUnits) { + SU->isScheduled = false; + for (SDep& Succ : SU->Succs) { + if (BC->isSUInBlock(Succ.getSUnit(), ID)) + undoReleaseSucc(SU, &Succ); + } + } + HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0); + ScheduledSUnits.clear(); + Scheduled = false; +} + +void SIScheduleBlock::undoReleaseSucc(SUnit *SU, SDep *SuccEdge) { + SUnit *SuccSU = SuccEdge->getSUnit(); + + if (SuccEdge->isWeak()) { + ++SuccSU->WeakPredsLeft; + return; + } + ++SuccSU->NumPredsLeft; +} + +void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) { + SUnit *SuccSU = SuccEdge->getSUnit(); + + if (SuccEdge->isWeak()) { + --SuccSU->WeakPredsLeft; + return; + } +#ifndef NDEBUG + if (SuccSU->NumPredsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + SuccSU->dump(DAG); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(nullptr); + } +#endif + + --SuccSU->NumPredsLeft; +} + +/// Release Successors of the SU that are in the block or not. +void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) { + for (SDep& Succ : SU->Succs) { + SUnit *SuccSU = Succ.getSUnit(); + + if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock) + continue; + + releaseSucc(SU, &Succ); + if (SuccSU->NumPredsLeft == 0 && InOrOutBlock) + TopReadySUs.push_back(SuccSU); + } +} + +void SIScheduleBlock::nodeScheduled(SUnit *SU) { + // Is in TopReadySUs + assert (!SU->NumPredsLeft); + std::vector<SUnit*>::iterator I = + std::find(TopReadySUs.begin(), TopReadySUs.end(), SU); + if (I == TopReadySUs.end()) { + dbgs() << "Data Structure Bug in SI Scheduler\n"; + llvm_unreachable(nullptr); + } + TopReadySUs.erase(I); + + releaseSuccessors(SU, true); + // Scheduling this node will trigger a wait, + // thus propagate to other instructions that they do not need to wait either. + if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]]) + HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0); + + if (DAG->IsLowLatencySU[SU->NodeNum]) { + for (SDep& Succ : SU->Succs) { + std::map<unsigned, unsigned>::iterator I = + NodeNum2Index.find(Succ.getSUnit()->NodeNum); + if (I != NodeNum2Index.end()) + HasLowLatencyNonWaitedParent[I->second] = 1; + } + } + SU->isScheduled = true; +} + +void SIScheduleBlock::finalizeUnits() { + // We remove links from outside blocks to enable scheduling inside the block. + for (SUnit* SU : SUnits) { + releaseSuccessors(SU, false); + if (DAG->IsHighLatencySU[SU->NodeNum]) + HighLatencyBlock = true; + } + HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0); +} + +// we maintain ascending order of IDs +void SIScheduleBlock::addPred(SIScheduleBlock *Pred) { + unsigned PredID = Pred->getID(); + + // Check if not already predecessor. + for (SIScheduleBlock* P : Preds) { + if (PredID == P->getID()) + return; + } + Preds.push_back(Pred); + +#ifndef NDEBUG + for (SIScheduleBlock* S : Succs) { + if (PredID == S->getID()) + assert(!"Loop in the Block Graph!\n"); + } +#endif +} + +void SIScheduleBlock::addSucc(SIScheduleBlock *Succ) { + unsigned SuccID = Succ->getID(); + + // Check if not already predecessor. + for (SIScheduleBlock* S : Succs) { + if (SuccID == S->getID()) + return; + } + if (Succ->isHighLatencyBlock()) + ++NumHighLatencySuccessors; + Succs.push_back(Succ); +#ifndef NDEBUG + for (SIScheduleBlock* P : Preds) { + if (SuccID == P->getID()) + assert("Loop in the Block Graph!\n"); + } +#endif +} + +#ifndef NDEBUG +void SIScheduleBlock::printDebug(bool full) { + dbgs() << "Block (" << ID << ")\n"; + if (!full) + return; + + dbgs() << "\nContains High Latency Instruction: " + << HighLatencyBlock << '\n'; + dbgs() << "\nDepends On:\n"; + for (SIScheduleBlock* P : Preds) { + P->printDebug(false); + } + + dbgs() << "\nSuccessors:\n"; + for (SIScheduleBlock* S : Succs) { + S->printDebug(false); + } + + if (Scheduled) { + dbgs() << "LiveInPressure " << LiveInPressure[DAG->getSGPRSetID()] << ' ' + << LiveInPressure[DAG->getVGPRSetID()] << '\n'; + dbgs() << "LiveOutPressure " << LiveOutPressure[DAG->getSGPRSetID()] << ' ' + << LiveOutPressure[DAG->getVGPRSetID()] << "\n\n"; + dbgs() << "LiveIns:\n"; + for (unsigned Reg : LiveInRegs) + dbgs() << PrintVRegOrUnit(Reg, DAG->getTRI()) << ' '; + + dbgs() << "\nLiveOuts:\n"; + for (unsigned Reg : LiveOutRegs) + dbgs() << PrintVRegOrUnit(Reg, DAG->getTRI()) << ' '; + } + + dbgs() << "\nInstructions:\n"; + if (!Scheduled) { + for (SUnit* SU : SUnits) { + SU->dump(DAG); + } + } else { + for (SUnit* SU : SUnits) { + SU->dump(DAG); + } + } + + dbgs() << "///////////////////////\n"; +} + +#endif + +// SIScheduleBlockCreator // + +SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG) : +DAG(DAG) { +} + +SIScheduleBlockCreator::~SIScheduleBlockCreator() { +} + +SIScheduleBlocks +SIScheduleBlockCreator::getBlocks(SISchedulerBlockCreatorVariant BlockVariant) { + std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B = + Blocks.find(BlockVariant); + if (B == Blocks.end()) { + SIScheduleBlocks Res; + createBlocksForVariant(BlockVariant); + topologicalSort(); + scheduleInsideBlocks(); + fillStats(); + Res.Blocks = CurrentBlocks; + Res.TopDownIndex2Block = TopDownIndex2Block; + Res.TopDownBlock2Index = TopDownBlock2Index; + Blocks[BlockVariant] = Res; + return Res; + } else { + return B->second; + } +} + +bool SIScheduleBlockCreator::isSUInBlock(SUnit *SU, unsigned ID) { + if (SU->NodeNum >= DAG->SUnits.size()) + return false; + return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID; +} + +void SIScheduleBlockCreator::colorHighLatenciesAlone() { + unsigned DAGSize = DAG->SUnits.size(); + + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &DAG->SUnits[i]; + if (DAG->IsHighLatencySU[SU->NodeNum]) { + CurrentColoring[SU->NodeNum] = NextReservedID++; + } + } +} + +void SIScheduleBlockCreator::colorHighLatenciesGroups() { + unsigned DAGSize = DAG->SUnits.size(); + unsigned NumHighLatencies = 0; + unsigned GroupSize; + unsigned Color = NextReservedID; + unsigned Count = 0; + std::set<unsigned> FormingGroup; + + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &DAG->SUnits[i]; + if (DAG->IsHighLatencySU[SU->NodeNum]) + ++NumHighLatencies; + } + + if (NumHighLatencies == 0) + return; + + if (NumHighLatencies <= 6) + GroupSize = 2; + else if (NumHighLatencies <= 12) + GroupSize = 3; + else + GroupSize = 4; + + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &DAG->SUnits[i]; + if (DAG->IsHighLatencySU[SU->NodeNum]) { + unsigned CompatibleGroup = true; + unsigned ProposedColor = Color; + for (unsigned j : FormingGroup) { + // TODO: Currently CompatibleGroup will always be false, + // because the graph enforces the load order. This + // can be fixed, but as keeping the load order is often + // good for performance that causes a performance hit (both + // the default scheduler and this scheduler). + // When this scheduler determines a good load order, + // this can be fixed. + if (!DAG->canAddEdge(SU, &DAG->SUnits[j]) || + !DAG->canAddEdge(&DAG->SUnits[j], SU)) + CompatibleGroup = false; + } + if (!CompatibleGroup || ++Count == GroupSize) { + FormingGroup.clear(); + Color = ++NextReservedID; + if (!CompatibleGroup) { + ProposedColor = Color; + FormingGroup.insert(SU->NodeNum); + } + Count = 0; + } else { + FormingGroup.insert(SU->NodeNum); + } + CurrentColoring[SU->NodeNum] = ProposedColor; + } + } +} + +void SIScheduleBlockCreator::colorComputeReservedDependencies() { + unsigned DAGSize = DAG->SUnits.size(); + std::map<std::set<unsigned>, unsigned> ColorCombinations; + + CurrentTopDownReservedDependencyColoring.clear(); + CurrentBottomUpReservedDependencyColoring.clear(); + + CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0); + CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0); + + // Traverse TopDown, and give different colors to SUs depending + // on which combination of High Latencies they depend on. + + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &DAG->SUnits[DAG->TopDownIndex2SU[i]]; + std::set<unsigned> SUColors; + + // Already given. + if (CurrentColoring[SU->NodeNum]) { + CurrentTopDownReservedDependencyColoring[SU->NodeNum] = + CurrentColoring[SU->NodeNum]; + continue; + } + + for (SDep& PredDep : SU->Preds) { + SUnit *Pred = PredDep.getSUnit(); + if (PredDep.isWeak() || Pred->NodeNum >= DAGSize) + continue; + if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0) + SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]); + } + // Color 0 by default. + if (SUColors.empty()) + continue; + // Same color than parents. + if (SUColors.size() == 1 && *SUColors.begin() > DAGSize) + CurrentTopDownReservedDependencyColoring[SU->NodeNum] = + *SUColors.begin(); + else { + std::map<std::set<unsigned>, unsigned>::iterator Pos = + ColorCombinations.find(SUColors); + if (Pos != ColorCombinations.end()) { + CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second; + } else { + CurrentTopDownReservedDependencyColoring[SU->NodeNum] = + NextNonReservedID; + ColorCombinations[SUColors] = NextNonReservedID++; + } + } + } + + ColorCombinations.clear(); + + // Same as before, but BottomUp. + + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]]; + std::set<unsigned> SUColors; + + // Already given. + if (CurrentColoring[SU->NodeNum]) { + CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = + CurrentColoring[SU->NodeNum]; + continue; + } + + for (SDep& SuccDep : SU->Succs) { + SUnit *Succ = SuccDep.getSUnit(); + if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) + continue; + if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0) + SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]); + } + // Keep color 0. + if (SUColors.empty()) + continue; + // Same color than parents. + if (SUColors.size() == 1 && *SUColors.begin() > DAGSize) + CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = + *SUColors.begin(); + else { + std::map<std::set<unsigned>, unsigned>::iterator Pos = + ColorCombinations.find(SUColors); + if (Pos != ColorCombinations.end()) { + CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second; + } else { + CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = + NextNonReservedID; + ColorCombinations[SUColors] = NextNonReservedID++; + } + } + } +} + +void SIScheduleBlockCreator::colorAccordingToReservedDependencies() { + unsigned DAGSize = DAG->SUnits.size(); + std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations; + + // Every combination of colors given by the top down + // and bottom up Reserved node dependency + + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &DAG->SUnits[i]; + std::pair<unsigned, unsigned> SUColors; + + // High latency instructions: already given. + if (CurrentColoring[SU->NodeNum]) + continue; + + SUColors.first = CurrentTopDownReservedDependencyColoring[SU->NodeNum]; + SUColors.second = CurrentBottomUpReservedDependencyColoring[SU->NodeNum]; + + std::map<std::pair<unsigned, unsigned>, unsigned>::iterator Pos = + ColorCombinations.find(SUColors); + if (Pos != ColorCombinations.end()) { + CurrentColoring[SU->NodeNum] = Pos->second; + } else { + CurrentColoring[SU->NodeNum] = NextNonReservedID; + ColorCombinations[SUColors] = NextNonReservedID++; + } + } +} + +void SIScheduleBlockCreator::colorEndsAccordingToDependencies() { + unsigned DAGSize = DAG->SUnits.size(); + std::vector<int> PendingColoring = CurrentColoring; + + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]]; + std::set<unsigned> SUColors; + std::set<unsigned> SUColorsPending; + + if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) + continue; + + if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 || + CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0) + continue; + + for (SDep& SuccDep : SU->Succs) { + SUnit *Succ = SuccDep.getSUnit(); + if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) + continue; + if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 || + CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0) + SUColors.insert(CurrentColoring[Succ->NodeNum]); + SUColorsPending.insert(PendingColoring[Succ->NodeNum]); + } + if (SUColors.size() == 1 && SUColorsPending.size() == 1) + PendingColoring[SU->NodeNum] = *SUColors.begin(); + else // TODO: Attribute new colors depending on color + // combination of children. + PendingColoring[SU->NodeNum] = NextNonReservedID++; + } + CurrentColoring = PendingColoring; +} + + +void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() { + unsigned DAGSize = DAG->SUnits.size(); + unsigned PreviousColor; + std::set<unsigned> SeenColors; + + if (DAGSize <= 1) + return; + + PreviousColor = CurrentColoring[0]; + + for (unsigned i = 1, e = DAGSize; i != e; ++i) { + SUnit *SU = &DAG->SUnits[i]; + unsigned CurrentColor = CurrentColoring[i]; + unsigned PreviousColorSave = PreviousColor; + assert(i == SU->NodeNum); + + if (CurrentColor != PreviousColor) + SeenColors.insert(PreviousColor); + PreviousColor = CurrentColor; + + if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) + continue; + + if (SeenColors.find(CurrentColor) == SeenColors.end()) + continue; + + if (PreviousColorSave != CurrentColor) + CurrentColoring[i] = NextNonReservedID++; + else + CurrentColoring[i] = CurrentColoring[i-1]; + } +} + +void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() { + unsigned DAGSize = DAG->SUnits.size(); + + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]]; + std::set<unsigned> SUColors; + + if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) + continue; + + // No predecessor: Vgpr constant loading. + // Low latency instructions usually have a predecessor (the address) + if (SU->Preds.size() > 0 && !DAG->IsLowLatencySU[SU->NodeNum]) + continue; + + for (SDep& SuccDep : SU->Succs) { + SUnit *Succ = SuccDep.getSUnit(); + if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) + continue; + SUColors.insert(CurrentColoring[Succ->NodeNum]); + } + if (SUColors.size() == 1) + CurrentColoring[SU->NodeNum] = *SUColors.begin(); + } +} + +void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() { + unsigned DAGSize = DAG->SUnits.size(); + + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]]; + std::set<unsigned> SUColors; + + if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) + continue; + + for (SDep& SuccDep : SU->Succs) { + SUnit *Succ = SuccDep.getSUnit(); + if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) + continue; + SUColors.insert(CurrentColoring[Succ->NodeNum]); + } + if (SUColors.size() == 1) + CurrentColoring[SU->NodeNum] = *SUColors.begin(); + } +} + +void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() { + unsigned DAGSize = DAG->SUnits.size(); + + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]]; + std::set<unsigned> SUColors; + + if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) + continue; + + for (SDep& SuccDep : SU->Succs) { + SUnit *Succ = SuccDep.getSUnit(); + if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) + continue; + SUColors.insert(CurrentColoring[Succ->NodeNum]); + } + if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize) + CurrentColoring[SU->NodeNum] = *SUColors.begin(); + } +} + +void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() { + unsigned DAGSize = DAG->SUnits.size(); + std::map<unsigned, unsigned> ColorCount; + + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]]; + unsigned color = CurrentColoring[SU->NodeNum]; + std::map<unsigned, unsigned>::iterator Pos = ColorCount.find(color); + if (Pos != ColorCount.end()) { + ++ColorCount[color]; + } else { + ColorCount[color] = 1; + } + } + + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]]; + unsigned color = CurrentColoring[SU->NodeNum]; + std::set<unsigned> SUColors; + + if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) + continue; + + if (ColorCount[color] > 1) + continue; + + for (SDep& SuccDep : SU->Succs) { + SUnit *Succ = SuccDep.getSUnit(); + if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) + continue; + SUColors.insert(CurrentColoring[Succ->NodeNum]); + } + if (SUColors.size() == 1 && *SUColors.begin() != color) { + --ColorCount[color]; + CurrentColoring[SU->NodeNum] = *SUColors.begin(); + ++ColorCount[*SUColors.begin()]; + } + } +} + +void SIScheduleBlockCreator::cutHugeBlocks() { + // TODO +} + +void SIScheduleBlockCreator::regroupNoUserInstructions() { + unsigned DAGSize = DAG->SUnits.size(); + int GroupID = NextNonReservedID++; + + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]]; + bool hasSuccessor = false; + + if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) + continue; + + for (SDep& SuccDep : SU->Succs) { + SUnit *Succ = SuccDep.getSUnit(); + if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) + continue; + hasSuccessor = true; + } + if (!hasSuccessor) + CurrentColoring[SU->NodeNum] = GroupID; + } +} + +void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant) { + unsigned DAGSize = DAG->SUnits.size(); + std::map<unsigned,unsigned> RealID; + + CurrentBlocks.clear(); + CurrentColoring.clear(); + CurrentColoring.resize(DAGSize, 0); + Node2CurrentBlock.clear(); + + // Restore links previous scheduling variant has overridden. + DAG->restoreSULinksLeft(); + + NextReservedID = 1; + NextNonReservedID = DAGSize + 1; + + DEBUG(dbgs() << "Coloring the graph\n"); + + if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesGrouped) + colorHighLatenciesGroups(); + else + colorHighLatenciesAlone(); + colorComputeReservedDependencies(); + colorAccordingToReservedDependencies(); + colorEndsAccordingToDependencies(); + if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesAlonePlusConsecutive) + colorForceConsecutiveOrderInGroup(); + regroupNoUserInstructions(); + colorMergeConstantLoadsNextGroup(); + colorMergeIfPossibleNextGroupOnlyForReserved(); + + // Put SUs of same color into same block + Node2CurrentBlock.resize(DAGSize, -1); + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &DAG->SUnits[i]; + unsigned Color = CurrentColoring[SU->NodeNum]; + if (RealID.find(Color) == RealID.end()) { + int ID = CurrentBlocks.size(); + BlockPtrs.push_back( + make_unique<SIScheduleBlock>(DAG, this, ID)); + CurrentBlocks.push_back(BlockPtrs.rbegin()->get()); + RealID[Color] = ID; + } + CurrentBlocks[RealID[Color]]->addUnit(SU); + Node2CurrentBlock[SU->NodeNum] = RealID[Color]; + } + + // Build dependencies between blocks. + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &DAG->SUnits[i]; + int SUID = Node2CurrentBlock[i]; + for (SDep& SuccDep : SU->Succs) { + SUnit *Succ = SuccDep.getSUnit(); + if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) + continue; + if (Node2CurrentBlock[Succ->NodeNum] != SUID) + CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]]); + } + for (SDep& PredDep : SU->Preds) { + SUnit *Pred = PredDep.getSUnit(); + if (PredDep.isWeak() || Pred->NodeNum >= DAGSize) + continue; + if (Node2CurrentBlock[Pred->NodeNum] != SUID) + CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]); + } + } + + // Free root and leafs of all blocks to enable scheduling inside them. + for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) { + SIScheduleBlock *Block = CurrentBlocks[i]; + Block->finalizeUnits(); + } + DEBUG( + dbgs() << "Blocks created:\n\n"; + for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) { + SIScheduleBlock *Block = CurrentBlocks[i]; + Block->printDebug(true); + } + ); +} + +// Two functions taken from Codegen/MachineScheduler.cpp + +/// If this iterator is a debug value, increment until reaching the End or a +/// non-debug instruction. +static MachineBasicBlock::const_iterator +nextIfDebug(MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator End) { + for(; I != End; ++I) { + if (!I->isDebugValue()) + break; + } + return I; +} + +/// Non-const version. +static MachineBasicBlock::iterator +nextIfDebug(MachineBasicBlock::iterator I, + MachineBasicBlock::const_iterator End) { + // Cast the return value to nonconst MachineInstr, then cast to an + // instr_iterator, which does not check for null, finally return a + // bundle_iterator. + return MachineBasicBlock::instr_iterator( + const_cast<MachineInstr*>( + &*nextIfDebug(MachineBasicBlock::const_iterator(I), End))); +} + +void SIScheduleBlockCreator::topologicalSort() { + unsigned DAGSize = CurrentBlocks.size(); + std::vector<int> WorkList; + + DEBUG(dbgs() << "Topological Sort\n"); + + WorkList.reserve(DAGSize); + TopDownIndex2Block.resize(DAGSize); + TopDownBlock2Index.resize(DAGSize); + BottomUpIndex2Block.resize(DAGSize); + + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SIScheduleBlock *Block = CurrentBlocks[i]; + unsigned Degree = Block->getSuccs().size(); + TopDownBlock2Index[i] = Degree; + if (Degree == 0) { + WorkList.push_back(i); + } + } + + int Id = DAGSize; + while (!WorkList.empty()) { + int i = WorkList.back(); + SIScheduleBlock *Block = CurrentBlocks[i]; + WorkList.pop_back(); + TopDownBlock2Index[i] = --Id; + TopDownIndex2Block[Id] = i; + for (SIScheduleBlock* Pred : Block->getPreds()) { + if (!--TopDownBlock2Index[Pred->getID()]) + WorkList.push_back(Pred->getID()); + } + } + +#ifndef NDEBUG + // Check correctness of the ordering. + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SIScheduleBlock *Block = CurrentBlocks[i]; + for (SIScheduleBlock* Pred : Block->getPreds()) { + assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && + "Wrong Top Down topological sorting"); + } + } +#endif + + BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(), + TopDownIndex2Block.rend()); +} + +void SIScheduleBlockCreator::scheduleInsideBlocks() { + unsigned DAGSize = CurrentBlocks.size(); + + DEBUG(dbgs() << "\nScheduling Blocks\n\n"); + + // We do schedule a valid scheduling such that a Block corresponds + // to a range of instructions. + DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n"); + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SIScheduleBlock *Block = CurrentBlocks[i]; + Block->fastSchedule(); + } + + // Note: the following code, and the part restoring previous position + // is by far the most expensive operation of the Scheduler. + + // Do not update CurrentTop. + MachineBasicBlock::iterator CurrentTopFastSched = DAG->getCurrentTop(); + std::vector<MachineBasicBlock::iterator> PosOld; + std::vector<MachineBasicBlock::iterator> PosNew; + PosOld.reserve(DAG->SUnits.size()); + PosNew.reserve(DAG->SUnits.size()); + + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + int BlockIndice = TopDownIndex2Block[i]; + SIScheduleBlock *Block = CurrentBlocks[BlockIndice]; + std::vector<SUnit*> SUs = Block->getScheduledUnits(); + + for (SUnit* SU : SUs) { + MachineInstr *MI = SU->getInstr(); + MachineBasicBlock::iterator Pos = MI; + PosOld.push_back(Pos); + if (&*CurrentTopFastSched == MI) { + PosNew.push_back(Pos); + CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched, + DAG->getCurrentBottom()); + } else { + // Update the instruction stream. + DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI); + + // Update LiveIntervals. + // Note: Moving all instructions and calling handleMove everytime + // is the most cpu intensive operation of the scheduler. + // It would gain a lot if there was a way to recompute the + // LiveIntervals for the entire scheduling region. + DAG->getLIS()->handleMove(MI, /*UpdateFlags=*/true); + PosNew.push_back(CurrentTopFastSched); + } + } + } + + // Now we have Block of SUs == Block of MI. + // We do the final schedule for the instructions inside the block. + // The property that all the SUs of the Block are grouped together as MI + // is used for correct reg usage tracking. + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SIScheduleBlock *Block = CurrentBlocks[i]; + std::vector<SUnit*> SUs = Block->getScheduledUnits(); + Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr()); + } + + DEBUG(dbgs() << "Restoring MI Pos\n"); + // Restore old ordering (which prevents a LIS->handleMove bug). + for (unsigned i = PosOld.size(), e = 0; i != e; --i) { + MachineBasicBlock::iterator POld = PosOld[i-1]; + MachineBasicBlock::iterator PNew = PosNew[i-1]; + if (PNew != POld) { + // Update the instruction stream. + DAG->getBB()->splice(POld, DAG->getBB(), PNew); + + // Update LiveIntervals. + DAG->getLIS()->handleMove(POld, /*UpdateFlags=*/true); + } + } + + DEBUG( + for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) { + SIScheduleBlock *Block = CurrentBlocks[i]; + Block->printDebug(true); + } + ); +} + +void SIScheduleBlockCreator::fillStats() { + unsigned DAGSize = CurrentBlocks.size(); + + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + int BlockIndice = TopDownIndex2Block[i]; + SIScheduleBlock *Block = CurrentBlocks[BlockIndice]; + if (Block->getPreds().size() == 0) + Block->Depth = 0; + else { + unsigned Depth = 0; + for (SIScheduleBlock *Pred : Block->getPreds()) { + if (Depth < Pred->Depth + 1) + Depth = Pred->Depth + 1; + } + Block->Depth = Depth; + } + } + + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + int BlockIndice = BottomUpIndex2Block[i]; + SIScheduleBlock *Block = CurrentBlocks[BlockIndice]; + if (Block->getSuccs().size() == 0) + Block->Height = 0; + else { + unsigned Height = 0; + for (SIScheduleBlock *Succ : Block->getSuccs()) { + if (Height < Succ->Height + 1) + Height = Succ->Height + 1; + } + Block->Height = Height; + } + } +} + +// SIScheduleBlockScheduler // + +SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, + SISchedulerBlockSchedulerVariant Variant, + SIScheduleBlocks BlocksStruct) : + DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks), + LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0), + SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) { + + // Fill the usage of every output + // Warning: while by construction we always have a link between two blocks + // when one needs a result from the other, the number of users of an output + // is not the sum of child blocks having as input the same virtual register. + // Here is an example. A produces x and y. B eats x and produces x'. + // C eats x' and y. The register coalescer may have attributed the same + // virtual register to x and x'. + // To count accurately, we do a topological sort. In case the register is + // found for several parents, we increment the usage of the one with the + // highest topological index. + LiveOutRegsNumUsages.resize(Blocks.size()); + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + SIScheduleBlock *Block = Blocks[i]; + for (unsigned Reg : Block->getInRegs()) { + bool Found = false; + int topoInd = -1; + for (SIScheduleBlock* Pred: Block->getPreds()) { + std::set<unsigned> PredOutRegs = Pred->getOutRegs(); + std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg); + + if (RegPos != PredOutRegs.end()) { + Found = true; + if (topoInd < BlocksStruct.TopDownBlock2Index[Pred->getID()]) { + topoInd = BlocksStruct.TopDownBlock2Index[Pred->getID()]; + } + } + } + + if (!Found) + continue; + + int PredID = BlocksStruct.TopDownIndex2Block[topoInd]; + std::map<unsigned, unsigned>::iterator RegPos = + LiveOutRegsNumUsages[PredID].find(Reg); + if (RegPos != LiveOutRegsNumUsages[PredID].end()) { + ++LiveOutRegsNumUsages[PredID][Reg]; + } else { + LiveOutRegsNumUsages[PredID][Reg] = 1; + } + } + } + + LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0); + BlockNumPredsLeft.resize(Blocks.size()); + BlockNumSuccsLeft.resize(Blocks.size()); + + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + SIScheduleBlock *Block = Blocks[i]; + BlockNumPredsLeft[i] = Block->getPreds().size(); + BlockNumSuccsLeft[i] = Block->getSuccs().size(); + } + +#ifndef NDEBUG + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + SIScheduleBlock *Block = Blocks[i]; + assert(Block->getID() == i); + } +#endif + + std::set<unsigned> InRegs = DAG->getInRegs(); + addLiveRegs(InRegs); + + // Fill LiveRegsConsumers for regs that were already + // defined before scheduling. + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + SIScheduleBlock *Block = Blocks[i]; + for (unsigned Reg : Block->getInRegs()) { + bool Found = false; + for (SIScheduleBlock* Pred: Block->getPreds()) { + std::set<unsigned> PredOutRegs = Pred->getOutRegs(); + std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg); + + if (RegPos != PredOutRegs.end()) { + Found = true; + break; + } + } + + if (!Found) { + if (LiveRegsConsumers.find(Reg) == LiveRegsConsumers.end()) + LiveRegsConsumers[Reg] = 1; + else + ++LiveRegsConsumers[Reg]; + } + } + } + + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + SIScheduleBlock *Block = Blocks[i]; + if (BlockNumPredsLeft[i] == 0) { + ReadyBlocks.push_back(Block); + } + } + + while (SIScheduleBlock *Block = pickBlock()) { + BlocksScheduled.push_back(Block); + blockScheduled(Block); + } + + DEBUG( + dbgs() << "Block Order:"; + for (SIScheduleBlock* Block : BlocksScheduled) { + dbgs() << ' ' << Block->getID(); + } + ); +} + +bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand, + SIBlockSchedCandidate &TryCand) { + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; + return true; + } + + // Try to hide high latencies. + if (tryLess(TryCand.LastPosHighLatParentScheduled, + Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency)) + return true; + // Schedule high latencies early so you can hide them better. + if (tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency, + TryCand, Cand, Latency)) + return true; + if (TryCand.IsHighLatency && tryGreater(TryCand.Height, Cand.Height, + TryCand, Cand, Depth)) + return true; + if (tryGreater(TryCand.NumHighLatencySuccessors, + Cand.NumHighLatencySuccessors, + TryCand, Cand, Successor)) + return true; + return false; +} + +bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand, + SIBlockSchedCandidate &TryCand) { + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; + return true; + } + + if (tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0, + TryCand, Cand, RegUsage)) + return true; + if (tryGreater(TryCand.NumSuccessors > 0, + Cand.NumSuccessors > 0, + TryCand, Cand, Successor)) + return true; + if (tryGreater(TryCand.Height, Cand.Height, TryCand, Cand, Depth)) + return true; + if (tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff, + TryCand, Cand, RegUsage)) + return true; + return false; +} + +SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() { + SIBlockSchedCandidate Cand; + std::vector<SIScheduleBlock*>::iterator Best; + SIScheduleBlock *Block; + if (ReadyBlocks.empty()) + return nullptr; + + DAG->fillVgprSgprCost(LiveRegs.begin(), LiveRegs.end(), + VregCurrentUsage, SregCurrentUsage); + if (VregCurrentUsage > maxVregUsage) + maxVregUsage = VregCurrentUsage; + if (VregCurrentUsage > maxSregUsage) + maxSregUsage = VregCurrentUsage; + DEBUG( + dbgs() << "Picking New Blocks\n"; + dbgs() << "Available: "; + for (SIScheduleBlock* Block : ReadyBlocks) + dbgs() << Block->getID() << ' '; + dbgs() << "\nCurrent Live:\n"; + for (unsigned Reg : LiveRegs) + dbgs() << PrintVRegOrUnit(Reg, DAG->getTRI()) << ' '; + dbgs() << '\n'; + dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; + dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n'; + ); + + Cand.Block = nullptr; + for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(), + E = ReadyBlocks.end(); I != E; ++I) { + SIBlockSchedCandidate TryCand; + TryCand.Block = *I; + TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock(); + TryCand.VGPRUsageDiff = + checkRegUsageImpact(TryCand.Block->getInRegs(), + TryCand.Block->getOutRegs())[DAG->getVGPRSetID()]; + TryCand.NumSuccessors = TryCand.Block->getSuccs().size(); + TryCand.NumHighLatencySuccessors = + TryCand.Block->getNumHighLatencySuccessors(); + TryCand.LastPosHighLatParentScheduled = + (unsigned int) std::max<int> (0, + LastPosHighLatencyParentScheduled[TryCand.Block->getID()] - + LastPosWaitedHighLatency); + TryCand.Height = TryCand.Block->Height; + // Try not to increase VGPR usage too much, else we may spill. + if (VregCurrentUsage > 120 || + Variant != SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage) { + if (!tryCandidateRegUsage(Cand, TryCand) && + Variant != SISchedulerBlockSchedulerVariant::BlockRegUsage) + tryCandidateLatency(Cand, TryCand); + } else { + if (!tryCandidateLatency(Cand, TryCand)) + tryCandidateRegUsage(Cand, TryCand); + } + if (TryCand.Reason != NoCand) { + Cand.setBest(TryCand); + Best = I; + DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' ' + << getReasonStr(Cand.Reason) << '\n'); + } + } + + DEBUG( + dbgs() << "Picking: " << Cand.Block->getID() << '\n'; + dbgs() << "Is a block with high latency instruction: " + << (Cand.IsHighLatency ? "yes\n" : "no\n"); + dbgs() << "Position of last high latency dependency: " + << Cand.LastPosHighLatParentScheduled << '\n'; + dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n'; + dbgs() << '\n'; + ); + + Block = Cand.Block; + ReadyBlocks.erase(Best); + return Block; +} + +// Tracking of currently alive registers to determine VGPR Usage. + +void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) { + for (unsigned Reg : Regs) { + // For now only track virtual registers. + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + // If not already in the live set, then add it. + (void) LiveRegs.insert(Reg); + } +} + +void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block, + std::set<unsigned> &Regs) { + for (unsigned Reg : Regs) { + // For now only track virtual registers. + std::set<unsigned>::iterator Pos = LiveRegs.find(Reg); + assert (Pos != LiveRegs.end() && // Reg must be live. + LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && + LiveRegsConsumers[Reg] >= 1); + --LiveRegsConsumers[Reg]; + if (LiveRegsConsumers[Reg] == 0) + LiveRegs.erase(Pos); + } +} + +void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) { + for (SIScheduleBlock* Block : Parent->getSuccs()) { + --BlockNumPredsLeft[Block->getID()]; + if (BlockNumPredsLeft[Block->getID()] == 0) { + ReadyBlocks.push_back(Block); + } + // TODO: Improve check. When the dependency between the high latency + // instructions and the instructions of the other blocks are WAR or WAW + // there will be no wait triggered. We would like these cases to not + // update LastPosHighLatencyParentScheduled. + if (Parent->isHighLatencyBlock()) + LastPosHighLatencyParentScheduled[Block->getID()] = NumBlockScheduled; + } +} + +void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) { + decreaseLiveRegs(Block, Block->getInRegs()); + addLiveRegs(Block->getOutRegs()); + releaseBlockSuccs(Block); + for (std::map<unsigned, unsigned>::iterator RegI = + LiveOutRegsNumUsages[Block->getID()].begin(), + E = LiveOutRegsNumUsages[Block->getID()].end(); RegI != E; ++RegI) { + std::pair<unsigned, unsigned> RegP = *RegI; + if (LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end()) + LiveRegsConsumers[RegP.first] = RegP.second; + else { + assert(LiveRegsConsumers[RegP.first] == 0); + LiveRegsConsumers[RegP.first] += RegP.second; + } + } + if (LastPosHighLatencyParentScheduled[Block->getID()] > + (unsigned)LastPosWaitedHighLatency) + LastPosWaitedHighLatency = + LastPosHighLatencyParentScheduled[Block->getID()]; + ++NumBlockScheduled; +} + +std::vector<int> +SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs, + std::set<unsigned> &OutRegs) { + std::vector<int> DiffSetPressure; + DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0); + + for (unsigned Reg : InRegs) { + // For now only track virtual registers. + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + if (LiveRegsConsumers[Reg] > 1) + continue; + PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg); + for (; PSetI.isValid(); ++PSetI) { + DiffSetPressure[*PSetI] -= PSetI.getWeight(); + } + } + + for (unsigned Reg : OutRegs) { + // For now only track virtual registers. + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg); + for (; PSetI.isValid(); ++PSetI) { + DiffSetPressure[*PSetI] += PSetI.getWeight(); + } + } + + return DiffSetPressure; +} + +// SIScheduler // + +struct SIScheduleBlockResult +SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant, + SISchedulerBlockSchedulerVariant ScheduleVariant) { + SIScheduleBlocks Blocks = BlockCreator.getBlocks(BlockVariant); + SIScheduleBlockScheduler Scheduler(DAG, ScheduleVariant, Blocks); + std::vector<SIScheduleBlock*> ScheduledBlocks; + struct SIScheduleBlockResult Res; + + ScheduledBlocks = Scheduler.getBlocks(); + + for (unsigned b = 0; b < ScheduledBlocks.size(); ++b) { + SIScheduleBlock *Block = ScheduledBlocks[b]; + std::vector<SUnit*> SUs = Block->getScheduledUnits(); + + for (SUnit* SU : SUs) + Res.SUs.push_back(SU->NodeNum); + } + + Res.MaxSGPRUsage = Scheduler.getSGPRUsage(); + Res.MaxVGPRUsage = Scheduler.getVGPRUsage(); + return Res; +} + +// SIScheduleDAGMI // + +SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) : + ScheduleDAGMILive(C, make_unique<GenericScheduler>(C)) { + SITII = static_cast<const SIInstrInfo*>(TII); + SITRI = static_cast<const SIRegisterInfo*>(TRI); + + VGPRSetID = SITRI->getVGPR32PressureSet(); + SGPRSetID = SITRI->getSGPR32PressureSet(); +} + +SIScheduleDAGMI::~SIScheduleDAGMI() { +} + +ScheduleDAGInstrs *llvm::createSIMachineScheduler(MachineSchedContext *C) { + return new SIScheduleDAGMI(C); +} + +// Code adapted from scheduleDAG.cpp +// Does a topological sort over the SUs. +// Both TopDown and BottomUp +void SIScheduleDAGMI::topologicalSort() { + std::vector<int> TopDownSU2Index; + unsigned DAGSize = SUnits.size(); + std::vector<SUnit*> WorkList; + + DEBUG(dbgs() << "Topological Sort\n"); + WorkList.reserve(DAGSize); + + TopDownIndex2SU.resize(DAGSize); + TopDownSU2Index.resize(DAGSize); + BottomUpIndex2SU.resize(DAGSize); + + WorkList.push_back(&getExitSU()); + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &SUnits[i]; + int NodeNum = SU->NodeNum; + unsigned Degree = SU->Succs.size(); + TopDownSU2Index[NodeNum] = Degree; + if (Degree == 0) { + assert(SU->Succs.empty() && "SUnit should have no successors"); + WorkList.push_back(SU); + } + } + + int Id = DAGSize; + while (!WorkList.empty()) { + SUnit *SU = WorkList.back(); + WorkList.pop_back(); + if (SU->NodeNum < DAGSize) { + TopDownSU2Index[SU->NodeNum] = --Id; + TopDownIndex2SU[Id] = SU->NodeNum; + } + for (SDep& Pred : SU->Preds) { + SUnit *SU = Pred.getSUnit(); + if (SU->NodeNum < DAGSize && !--TopDownSU2Index[SU->NodeNum]) + WorkList.push_back(SU); + } + } + + BottomUpIndex2SU = std::vector<int>(TopDownIndex2SU.rbegin(), + TopDownIndex2SU.rend()); + +#ifndef NDEBUG + // Check correctness of the ordering + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &SUnits[i]; + for (SDep& Pred : SU->Preds) { + if (Pred.getSUnit()->NodeNum >= DAGSize) + continue; + assert(TopDownSU2Index[SU->NodeNum] > + TopDownSU2Index[Pred.getSUnit()->NodeNum] && + "Wrong Top Down topological sorting"); + } + } + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &SUnits[i]; + for (SDep& Succ : SU->Succs) { + if (Succ.getSUnit()->NodeNum >= DAGSize) + continue; + assert(TopDownSU2Index[SU->NodeNum] < + TopDownSU2Index[Succ.getSUnit()->NodeNum] && + "Wrong Bottom Up topological sorting"); + } + } +#endif +} + +// Move low latencies further from their user without +// increasing SGPR usage (in general) +// This is to be replaced by a better pass that would +// take into account SGPR usage (based on VGPR Usage +// and the corresponding wavefront count), that would +// try to merge groups of loads if it make sense, etc +void SIScheduleDAGMI::moveLowLatencies() { + unsigned DAGSize = SUnits.size(); + int LastLowLatencyUser = -1; + int LastLowLatencyPos = -1; + + for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) { + SUnit *SU = &SUnits[ScheduledSUnits[i]]; + bool IsLowLatencyUser = false; + unsigned MinPos = 0; + + for (SDep& PredDep : SU->Preds) { + SUnit *Pred = PredDep.getSUnit(); + if (SITII->isLowLatencyInstruction(Pred->getInstr())) { + IsLowLatencyUser = true; + } + if (Pred->NodeNum >= DAGSize) + continue; + unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum]; + if (PredPos >= MinPos) + MinPos = PredPos + 1; + } + + if (SITII->isLowLatencyInstruction(SU->getInstr())) { + unsigned BestPos = LastLowLatencyUser + 1; + if ((int)BestPos <= LastLowLatencyPos) + BestPos = LastLowLatencyPos + 1; + if (BestPos < MinPos) + BestPos = MinPos; + if (BestPos < i) { + for (unsigned u = i; u > BestPos; --u) { + ++ScheduledSUnitsInv[ScheduledSUnits[u-1]]; + ScheduledSUnits[u] = ScheduledSUnits[u-1]; + } + ScheduledSUnits[BestPos] = SU->NodeNum; + ScheduledSUnitsInv[SU->NodeNum] = BestPos; + } + LastLowLatencyPos = BestPos; + if (IsLowLatencyUser) + LastLowLatencyUser = BestPos; + } else if (IsLowLatencyUser) { + LastLowLatencyUser = i; + // Moves COPY instructions on which depends + // the low latency instructions too. + } else if (SU->getInstr()->getOpcode() == AMDGPU::COPY) { + bool CopyForLowLat = false; + for (SDep& SuccDep : SU->Succs) { + SUnit *Succ = SuccDep.getSUnit(); + if (SITII->isLowLatencyInstruction(Succ->getInstr())) { + CopyForLowLat = true; + } + } + if (!CopyForLowLat) + continue; + if (MinPos < i) { + for (unsigned u = i; u > MinPos; --u) { + ++ScheduledSUnitsInv[ScheduledSUnits[u-1]]; + ScheduledSUnits[u] = ScheduledSUnits[u-1]; + } + ScheduledSUnits[MinPos] = SU->NodeNum; + ScheduledSUnitsInv[SU->NodeNum] = MinPos; + } + } + } +} + +void SIScheduleDAGMI::restoreSULinksLeft() { + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + SUnits[i].isScheduled = false; + SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft; + SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft; + SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft; + SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft; + } +} + +// Return the Vgpr and Sgpr usage corresponding to some virtual registers. +template<typename _Iterator> void +SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End, + unsigned &VgprUsage, unsigned &SgprUsage) { + VgprUsage = 0; + SgprUsage = 0; + for (_Iterator RegI = First; RegI != End; ++RegI) { + unsigned Reg = *RegI; + // For now only track virtual registers + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + PSetIterator PSetI = MRI.getPressureSets(Reg); + for (; PSetI.isValid(); ++PSetI) { + if (*PSetI == VGPRSetID) + VgprUsage += PSetI.getWeight(); + else if (*PSetI == SGPRSetID) + SgprUsage += PSetI.getWeight(); + } + } +} + +void SIScheduleDAGMI::schedule() +{ + SmallVector<SUnit*, 8> TopRoots, BotRoots; + SIScheduleBlockResult Best, Temp; + DEBUG(dbgs() << "Preparing Scheduling\n"); + + buildDAGWithRegPressure(); + DEBUG( + for(SUnit& SU : SUnits) + SU.dumpAll(this) + ); + + Topo.InitDAGTopologicalSorting(); + topologicalSort(); + findRootsAndBiasEdges(TopRoots, BotRoots); + // We reuse several ScheduleDAGMI and ScheduleDAGMILive + // functions, but to make them happy we must initialize + // the default Scheduler implementation (even if we do not + // run it) + SchedImpl->initialize(this); + initQueues(TopRoots, BotRoots); + + // Fill some stats to help scheduling. + + SUnitsLinksBackup = SUnits; + IsLowLatencySU.clear(); + LowLatencyOffset.clear(); + IsHighLatencySU.clear(); + + IsLowLatencySU.resize(SUnits.size(), 0); + LowLatencyOffset.resize(SUnits.size(), 0); + IsHighLatencySU.resize(SUnits.size(), 0); + + for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) { + SUnit *SU = &SUnits[i]; + unsigned BaseLatReg, OffLatReg; + if (SITII->isLowLatencyInstruction(SU->getInstr())) { + IsLowLatencySU[i] = 1; + if (SITII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseLatReg, + OffLatReg, TRI)) + LowLatencyOffset[i] = OffLatReg; + } else if (SITII->isHighLatencyInstruction(SU->getInstr())) + IsHighLatencySU[i] = 1; + } + + SIScheduler Scheduler(this); + Best = Scheduler.scheduleVariant(SISchedulerBlockCreatorVariant::LatenciesAlone, + SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage); +#if 0 // To enable when handleMove fix lands + // if VGPR usage is extremely high, try other good performing variants + // which could lead to lower VGPR usage + if (Best.MaxVGPRUsage > 180) { + std::vector<std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant>> Variants = { + { LatenciesAlone, BlockRegUsageLatency }, +// { LatenciesAlone, BlockRegUsage }, + { LatenciesGrouped, BlockLatencyRegUsage }, +// { LatenciesGrouped, BlockRegUsageLatency }, +// { LatenciesGrouped, BlockRegUsage }, + { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage }, +// { LatenciesAlonePlusConsecutive, BlockRegUsageLatency }, +// { LatenciesAlonePlusConsecutive, BlockRegUsage } + }; + for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) { + Temp = Scheduler.scheduleVariant(v.first, v.second); + if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage) + Best = Temp; + } + } + // if VGPR usage is still extremely high, we may spill. Try other variants + // which are less performing, but that could lead to lower VGPR usage. + if (Best.MaxVGPRUsage > 200) { + std::vector<std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant>> Variants = { +// { LatenciesAlone, BlockRegUsageLatency }, + { LatenciesAlone, BlockRegUsage }, +// { LatenciesGrouped, BlockLatencyRegUsage }, + { LatenciesGrouped, BlockRegUsageLatency }, + { LatenciesGrouped, BlockRegUsage }, +// { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage }, + { LatenciesAlonePlusConsecutive, BlockRegUsageLatency }, + { LatenciesAlonePlusConsecutive, BlockRegUsage } + }; + for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) { + Temp = Scheduler.scheduleVariant(v.first, v.second); + if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage) + Best = Temp; + } + } +#endif + ScheduledSUnits = Best.SUs; + ScheduledSUnitsInv.resize(SUnits.size()); + + for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) { + ScheduledSUnitsInv[ScheduledSUnits[i]] = i; + } + + moveLowLatencies(); + + // Tell the outside world about the result of the scheduling. + + assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); + TopRPTracker.setPos(CurrentTop); + + for (std::vector<unsigned>::iterator I = ScheduledSUnits.begin(), + E = ScheduledSUnits.end(); I != E; ++I) { + SUnit *SU = &SUnits[*I]; + + scheduleMI(SU, true); + + DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " + << *SU->getInstr()); + } + + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + + placeDebugValues(); + + DEBUG({ + unsigned BBNum = begin()->getParent()->getNumber(); + dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); +} diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.h b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.h new file mode 100644 index 0000000..b270136 --- /dev/null +++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.h @@ -0,0 +1,489 @@ +//===-- SIMachineScheduler.h - SI Scheduler Interface -*- C++ -*-------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief SI Machine Scheduler interface +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H +#define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H + +#include "SIInstrInfo.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/RegisterPressure.h" + +using namespace llvm; + +namespace llvm { + +enum SIScheduleCandReason { + NoCand, + RegUsage, + Latency, + Successor, + Depth, + NodeOrder +}; + +struct SISchedulerCandidate { + // The reason for this candidate. + SIScheduleCandReason Reason; + + // Set of reasons that apply to multiple candidates. + uint32_t RepeatReasonSet; + + SISchedulerCandidate() + : Reason(NoCand), RepeatReasonSet(0) {} + + bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); } + void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); } +}; + +class SIScheduleDAGMI; +class SIScheduleBlockCreator; + +class SIScheduleBlock { + SIScheduleDAGMI *DAG; + SIScheduleBlockCreator *BC; + + std::vector<SUnit*> SUnits; + std::map<unsigned, unsigned> NodeNum2Index; + std::vector<SUnit*> TopReadySUs; + std::vector<SUnit*> ScheduledSUnits; + + /// The top of the unscheduled zone. + IntervalPressure TopPressure; + RegPressureTracker TopRPTracker; + + // Pressure: number of said class of registers needed to + // store the live virtual and real registers. + // We do care only of SGPR32 and VGPR32 and do track only virtual registers. + // Pressure of additional registers required inside the block. + std::vector<unsigned> InternalAdditionnalPressure; + // Pressure of input and output registers + std::vector<unsigned> LiveInPressure; + std::vector<unsigned> LiveOutPressure; + // Registers required by the block, and outputs. + // We do track only virtual registers. + // Note that some registers are not 32 bits, + // and thus the pressure is not equal + // to the number of live registers. + std::set<unsigned> LiveInRegs; + std::set<unsigned> LiveOutRegs; + + bool Scheduled; + bool HighLatencyBlock; + + std::vector<unsigned> HasLowLatencyNonWaitedParent; + + // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table. + unsigned ID; + + std::vector<SIScheduleBlock*> Preds; // All blocks predecessors. + std::vector<SIScheduleBlock*> Succs; // All blocks successors. + unsigned NumHighLatencySuccessors; + +public: + SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC, + unsigned ID): + DAG(DAG), BC(BC), SUnits(), TopReadySUs(), ScheduledSUnits(), + TopRPTracker(TopPressure), Scheduled(false), + HighLatencyBlock(false), ID(ID), + Preds(), Succs(), NumHighLatencySuccessors(0) {}; + + ~SIScheduleBlock() {}; + + unsigned getID() const { return ID; } + + /// Functions for Block construction. + void addUnit(SUnit *SU); + + // When all SUs have been added. + void finalizeUnits(); + + // Add block pred, which has instruction predecessor of SU. + void addPred(SIScheduleBlock *Pred); + void addSucc(SIScheduleBlock *Succ); + + const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; } + const std::vector<SIScheduleBlock*>& getSuccs() const { return Succs; } + + unsigned Height; // Maximum topdown path length to block without outputs + unsigned Depth; // Maximum bottomup path length to block without inputs + + unsigned getNumHighLatencySuccessors() const { + return NumHighLatencySuccessors; + } + + bool isHighLatencyBlock() { return HighLatencyBlock; } + + // This is approximative. + // Ideally should take into accounts some instructions (rcp, etc) + // are 4 times slower. + int getCost() { return SUnits.size(); } + + // The block Predecessors and Successors must be all registered + // before fastSchedule(). + // Fast schedule with no particular requirement. + void fastSchedule(); + + std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; } + + // Complete schedule that will try to minimize reg pressure and + // low latencies, and will fill liveins and liveouts. + // Needs all MIs to be grouped between BeginBlock and EndBlock. + // The MIs can be moved after the scheduling, + // it is just used to allow correct track of live registers. + void schedule(MachineBasicBlock::iterator BeginBlock, + MachineBasicBlock::iterator EndBlock); + + bool isScheduled() { return Scheduled; } + + + // Needs the block to be scheduled inside + // TODO: find a way to compute it. + std::vector<unsigned> &getInternalAdditionnalRegUsage() { + return InternalAdditionnalPressure; + } + + std::set<unsigned> &getInRegs() { return LiveInRegs; } + std::set<unsigned> &getOutRegs() { return LiveOutRegs; } + + void printDebug(bool Full); + +private: + struct SISchedCandidate : SISchedulerCandidate { + // The best SUnit candidate. + SUnit *SU; + + unsigned SGPRUsage; + unsigned VGPRUsage; + bool IsLowLatency; + unsigned LowLatencyOffset; + bool HasLowLatencyNonWaitedParent; + + SISchedCandidate() + : SU(nullptr) {} + + bool isValid() const { return SU; } + + // Copy the status of another candidate without changing policy. + void setBest(SISchedCandidate &Best) { + assert(Best.Reason != NoCand && "uninitialized Sched candidate"); + SU = Best.SU; + Reason = Best.Reason; + SGPRUsage = Best.SGPRUsage; + VGPRUsage = Best.VGPRUsage; + IsLowLatency = Best.IsLowLatency; + LowLatencyOffset = Best.LowLatencyOffset; + HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent; + } + }; + + void undoSchedule(); + + void undoReleaseSucc(SUnit *SU, SDep *SuccEdge); + void releaseSucc(SUnit *SU, SDep *SuccEdge); + // InOrOutBlock: restrict to links pointing inside the block (true), + // or restrict to links pointing outside the block (false). + void releaseSuccessors(SUnit *SU, bool InOrOutBlock); + + void nodeScheduled(SUnit *SU); + void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand); + void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand); + SUnit* pickNode(); + void traceCandidate(const SISchedCandidate &Cand); + void initRegPressure(MachineBasicBlock::iterator BeginBlock, + MachineBasicBlock::iterator EndBlock); +}; + +struct SIScheduleBlocks { + std::vector<SIScheduleBlock*> Blocks; + std::vector<int> TopDownIndex2Block; + std::vector<int> TopDownBlock2Index; +}; + +enum SISchedulerBlockCreatorVariant { + LatenciesAlone, + LatenciesGrouped, + LatenciesAlonePlusConsecutive +}; + +class SIScheduleBlockCreator { + SIScheduleDAGMI *DAG; + // unique_ptr handles freeing memory for us. + std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs; + std::map<SISchedulerBlockCreatorVariant, + SIScheduleBlocks> Blocks; + std::vector<SIScheduleBlock*> CurrentBlocks; + std::vector<int> Node2CurrentBlock; + + // Topological sort + // Maps topological index to the node number. + std::vector<int> TopDownIndex2Block; + std::vector<int> TopDownBlock2Index; + std::vector<int> BottomUpIndex2Block; + + // 0 -> Color not given. + // 1 to SUnits.size() -> Reserved group (you should only add elements to them). + // Above -> Other groups. + int NextReservedID; + int NextNonReservedID; + std::vector<int> CurrentColoring; + std::vector<int> CurrentTopDownReservedDependencyColoring; + std::vector<int> CurrentBottomUpReservedDependencyColoring; + +public: + SIScheduleBlockCreator(SIScheduleDAGMI *DAG); + ~SIScheduleBlockCreator(); + + SIScheduleBlocks + getBlocks(SISchedulerBlockCreatorVariant BlockVariant); + + bool isSUInBlock(SUnit *SU, unsigned ID); + +private: + // Give a Reserved color to every high latency. + void colorHighLatenciesAlone(); + + // Create groups of high latencies with a Reserved color. + void colorHighLatenciesGroups(); + + // Compute coloring for topdown and bottom traversals with + // different colors depending on dependencies on Reserved colors. + void colorComputeReservedDependencies(); + + // Give color to all non-colored SUs according to Reserved groups dependencies. + void colorAccordingToReservedDependencies(); + + // Divides Blocks having no bottom up or top down dependencies on Reserved groups. + // The new colors are computed according to the dependencies on the other blocks + // formed with colorAccordingToReservedDependencies. + void colorEndsAccordingToDependencies(); + + // Cut groups into groups with SUs in consecutive order (except for Reserved groups). + void colorForceConsecutiveOrderInGroup(); + + // Merge Constant loads that have all their users into another group to the group. + // (TODO: else if all their users depend on the same group, put them there) + void colorMergeConstantLoadsNextGroup(); + + // Merge SUs that have all their users into another group to the group + void colorMergeIfPossibleNextGroup(); + + // Merge SUs that have all their users into another group to the group, + // but only for Reserved groups. + void colorMergeIfPossibleNextGroupOnlyForReserved(); + + // Merge SUs that have all their users into another group to the group, + // but only if the group is no more than a few SUs. + void colorMergeIfPossibleSmallGroupsToNextGroup(); + + // Divides Blocks with important size. + // Idea of implementation: attribute new colors depending on topdown and + // bottom up links to other blocks. + void cutHugeBlocks(); + + // Put in one group all instructions with no users in this scheduling region + // (we'd want these groups be at the end). + void regroupNoUserInstructions(); + + void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant); + + void topologicalSort(); + + void scheduleInsideBlocks(); + + void fillStats(); +}; + +enum SISchedulerBlockSchedulerVariant { + BlockLatencyRegUsage, + BlockRegUsageLatency, + BlockRegUsage +}; + +class SIScheduleBlockScheduler { + SIScheduleDAGMI *DAG; + SISchedulerBlockSchedulerVariant Variant; + std::vector<SIScheduleBlock*> Blocks; + + std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages; + std::set<unsigned> LiveRegs; + // Num of schedulable unscheduled blocks reading the register. + std::map<unsigned, unsigned> LiveRegsConsumers; + + std::vector<unsigned> LastPosHighLatencyParentScheduled; + int LastPosWaitedHighLatency; + + std::vector<SIScheduleBlock*> BlocksScheduled; + unsigned NumBlockScheduled; + std::vector<SIScheduleBlock*> ReadyBlocks; + + unsigned VregCurrentUsage; + unsigned SregCurrentUsage; + + // Currently is only approximation. + unsigned maxVregUsage; + unsigned maxSregUsage; + + std::vector<unsigned> BlockNumPredsLeft; + std::vector<unsigned> BlockNumSuccsLeft; + +public: + SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, + SISchedulerBlockSchedulerVariant Variant, + SIScheduleBlocks BlocksStruct); + ~SIScheduleBlockScheduler() {}; + + std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }; + + unsigned getVGPRUsage() { return maxVregUsage; }; + unsigned getSGPRUsage() { return maxSregUsage; }; + +private: + struct SIBlockSchedCandidate : SISchedulerCandidate { + // The best Block candidate. + SIScheduleBlock *Block; + + bool IsHighLatency; + int VGPRUsageDiff; + unsigned NumSuccessors; + unsigned NumHighLatencySuccessors; + unsigned LastPosHighLatParentScheduled; + unsigned Height; + + SIBlockSchedCandidate() + : Block(nullptr) {} + + bool isValid() const { return Block; } + + // Copy the status of another candidate without changing policy. + void setBest(SIBlockSchedCandidate &Best) { + assert(Best.Reason != NoCand && "uninitialized Sched candidate"); + Block = Best.Block; + Reason = Best.Reason; + IsHighLatency = Best.IsHighLatency; + VGPRUsageDiff = Best.VGPRUsageDiff; + NumSuccessors = Best.NumSuccessors; + NumHighLatencySuccessors = Best.NumHighLatencySuccessors; + LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled; + Height = Best.Height; + } + }; + + bool tryCandidateLatency(SIBlockSchedCandidate &Cand, + SIBlockSchedCandidate &TryCand); + bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand, + SIBlockSchedCandidate &TryCand); + SIScheduleBlock *pickBlock(); + + void addLiveRegs(std::set<unsigned> &Regs); + void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs); + void releaseBlockSuccs(SIScheduleBlock *Parent); + void blockScheduled(SIScheduleBlock *Block); + + // Check register pressure change + // by scheduling a block with these LiveIn and LiveOut. + std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs, + std::set<unsigned> &OutRegs); + + void schedule(); +}; + +struct SIScheduleBlockResult { + std::vector<unsigned> SUs; + unsigned MaxSGPRUsage; + unsigned MaxVGPRUsage; +}; + +class SIScheduler { + SIScheduleDAGMI *DAG; + SIScheduleBlockCreator BlockCreator; + +public: + SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}; + + ~SIScheduler() {}; + + struct SIScheduleBlockResult + scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant, + SISchedulerBlockSchedulerVariant ScheduleVariant); +}; + +class SIScheduleDAGMI : public ScheduleDAGMILive { + const SIInstrInfo *SITII; + const SIRegisterInfo *SITRI; + + std::vector<SUnit> SUnitsLinksBackup; + + // For moveLowLatencies. After all Scheduling variants are tested. + std::vector<unsigned> ScheduledSUnits; + std::vector<unsigned> ScheduledSUnitsInv; + + unsigned VGPRSetID; + unsigned SGPRSetID; + +public: + SIScheduleDAGMI(MachineSchedContext *C); + + ~SIScheduleDAGMI() override; + + // Entry point for the schedule. + void schedule() override; + + // To init Block's RPTracker. + void initRPTracker(RegPressureTracker &RPTracker) { + RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); + } + + MachineBasicBlock *getBB() { return BB; } + MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }; + MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }; + LiveIntervals *getLIS() { return LIS; } + MachineRegisterInfo *getMRI() { return &MRI; } + const TargetRegisterInfo *getTRI() { return TRI; } + SUnit& getEntrySU() { return EntrySU; }; + SUnit& getExitSU() { return ExitSU; }; + + void restoreSULinksLeft(); + + template<typename _Iterator> void fillVgprSgprCost(_Iterator First, + _Iterator End, + unsigned &VgprUsage, + unsigned &SgprUsage); + std::set<unsigned> getInRegs() { + std::set<unsigned> InRegs (RPTracker.getPressure().LiveInRegs.begin(), + RPTracker.getPressure().LiveInRegs.end()); + return InRegs; + }; + + unsigned getVGPRSetID() const { return VGPRSetID; } + unsigned getSGPRSetID() const { return SGPRSetID; } + +private: + void topologicalSort(); + // After scheduling is done, improve low latency placements. + void moveLowLatencies(); + +public: + // Some stats for scheduling inside blocks. + std::vector<unsigned> IsLowLatencySU; + std::vector<unsigned> LowLatencyOffset; + std::vector<unsigned> IsHighLatencySU; + // Topological sort + // Maps topological index to the node number. + std::vector<int> TopDownIndex2SU; + std::vector<int> BottomUpIndex2SU; +}; + +} // namespace llvm + +#endif /* SIMACHINESCHEDULER_H_ */ diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 2afa009..609f5e7 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -23,7 +23,20 @@ using namespace llvm; -SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {} +SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() { + unsigned NumRegPressureSets = getNumRegPressureSets(); + + SGPR32SetID = NumRegPressureSets; + VGPR32SetID = NumRegPressureSets; + for (unsigned i = 0; i < NumRegPressureSets; ++i) { + if (strncmp("SGPR_32", getRegPressureSetName(i), 7) == 0) + SGPR32SetID = i; + else if (strncmp("VGPR_32", getRegPressureSetName(i), 7) == 0) + VGPR32SetID = i; + } + assert(SGPR32SetID < NumRegPressureSets && + VGPR32SetID < NumRegPressureSets); +} void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const { MCRegAliasIterator R(Reg, this, true); @@ -36,18 +49,15 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( const MachineFunction &MF) const { const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); if (ST.hasSGPRInitBug()) { - unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4; - if (ST.isXNACKEnabled()) - BaseIdx -= 4; - + // Leave space for flat_scr, xnack_mask, vcc, and alignment + unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 8 - 4; unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); } if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - // 98/99 need to be reserved for flat_scr or 96/97 for flat_scr and - // 98/99 for xnack_mask, and 100/101 for vcc. This is the next sgpr128 down - // either way. + // 96/97 need to be reserved for flat_scr, 98/99 for xnack_mask, and + // 100/101 for vcc. This is the next sgpr128 down. return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95; } @@ -58,25 +68,14 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( const MachineFunction &MF) const { const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); if (ST.hasSGPRInitBug()) { - unsigned Idx; - - if (!ST.isXNACKEnabled()) - Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5; - else - Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1; - + unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1; return AMDGPU::SGPR_32RegClass.getRegister(Idx); } if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - if (!ST.isXNACKEnabled()) { - // Next register before reservations for flat_scr and vcc. - return AMDGPU::SGPR97; - } else { - // Next register before reservations for flat_scr, xnack_mask, vcc, - // and scratch resource. - return AMDGPU::SGPR91; - } + // Next register before reservations for flat_scr, xnack_mask, vcc, + // and scratch resource. + return AMDGPU::SGPR91; } return AMDGPU::SGPR95; @@ -99,23 +98,22 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { // SI/CI have 104 SGPRs. VI has 102. We need to shift down the reservation - // for VCC/FLAT_SCR. + // for VCC/XNACK_MASK/FLAT_SCR. + // + // TODO The SGPRs that alias to XNACK_MASK could be used as general purpose + // SGPRs when the XNACK feature is not used. This is currently not done + // because the code that counts SGPRs cannot account for such holes. + reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97); reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99); reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101); - - if (ST.isXNACKEnabled()) - reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97); } // Tonga and Iceland can only allocate a fixed number of SGPRs due // to a hw bug. if (ST.hasSGPRInitBug()) { unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); - // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs). - unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4; - - if (ST.isXNACKEnabled()) - Limit -= 2; + // Reserve some SGPRs for FLAT_SCRATCH, XNACK_MASK, and VCC (6 SGPRs). + unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6; for (unsigned i = Limit; i < NumSGPRs; ++i) { unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); @@ -479,12 +477,38 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass( if (SubIdx == AMDGPU::NoSubRegister) return RC; - // If this register has a sub-register, we can safely assume it is a 32-bit - // register, because all of SI's sub-registers are 32-bit. + // We can assume that each lane corresponds to one 32-bit register. + unsigned Count = countPopulation(getSubRegIndexLaneMask(SubIdx)); if (isSGPRClass(RC)) { - return &AMDGPU::SGPR_32RegClass; + switch (Count) { + case 1: + return &AMDGPU::SGPR_32RegClass; + case 2: + return &AMDGPU::SReg_64RegClass; + case 4: + return &AMDGPU::SReg_128RegClass; + case 8: + return &AMDGPU::SReg_256RegClass; + case 16: /* fall-through */ + default: + llvm_unreachable("Invalid sub-register class size"); + } } else { - return &AMDGPU::VGPR_32RegClass; + switch (Count) { + case 1: + return &AMDGPU::VGPR_32RegClass; + case 2: + return &AMDGPU::VReg_64RegClass; + case 3: + return &AMDGPU::VReg_96RegClass; + case 4: + return &AMDGPU::VReg_128RegClass; + case 8: + return &AMDGPU::VReg_256RegClass; + case 16: /* fall-through */ + default: + llvm_unreachable("Invalid sub-register class size"); + } } } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index 1795237..9410e20 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -25,6 +25,9 @@ namespace llvm { struct SIRegisterInfo : public AMDGPURegisterInfo { private: + unsigned SGPR32SetID; + unsigned VGPR32SetID; + void reserveRegisterTuples(BitVector &, unsigned Reg) const; public: @@ -146,6 +149,9 @@ public: unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC) const; + unsigned getSGPR32PressureSet() const { return SGPR32SetID; }; + unsigned getVGPR32PressureSet() const { return VGPR32SetID; }; + private: void buildScratchLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, unsigned Value, diff --git a/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp b/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp index dbdc76b..d36c5d2 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp @@ -98,6 +98,9 @@ void SITypeRewriter::visitCallInst(CallInst &I) { SmallVector <Type*, 8> Types; bool NeedToReplace = false; Function *F = I.getCalledFunction(); + if (!F) + return; + std::string Name = F->getName(); for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { Value *Arg = I.getArgOperand(i); diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index add415e..3b4c235 100644 --- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -106,20 +106,27 @@ bool isReadOnlySegment(const GlobalValue *GV) { return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; } -static const char ShaderTypeAttribute[] = "ShaderType"; - -unsigned getShaderType(const Function &F) { - Attribute A = F.getFnAttribute(ShaderTypeAttribute); - unsigned ShaderType = ShaderType::COMPUTE; +static unsigned getIntegerAttribute(const Function &F, const char *Name, + unsigned Default) { + Attribute A = F.getFnAttribute(Name); + unsigned Result = Default; if (A.isStringAttribute()) { StringRef Str = A.getValueAsString(); - if (Str.getAsInteger(0, ShaderType)) { + if (Str.getAsInteger(0, Result)) { LLVMContext &Ctx = F.getContext(); Ctx.emitError("can't parse shader type"); } } - return ShaderType; + return Result; +} + +unsigned getShaderType(const Function &F) { + return getIntegerAttribute(F, "ShaderType", ShaderType::COMPUTE); +} + +unsigned getInitialPSInputAddr(const Function &F) { + return getIntegerAttribute(F, "InitialPSInputAddr", 0); } bool isSI(const MCSubtargetInfo &STI) { diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 19419a2..57cbe1b5 100644 --- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -45,6 +45,8 @@ bool isGlobalSegment(const GlobalValue *GV); bool isReadOnlySegment(const GlobalValue *GV); unsigned getShaderType(const Function &F); +unsigned getInitialPSInputAddr(const Function &F); + bool isSI(const MCSubtargetInfo &STI); bool isCI(const MCSubtargetInfo &STI); diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 419717c..a520770 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -87,9 +87,22 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } } + if (STI.isTargetDarwin() && F->getCallingConv() == CallingConv::CXX_FAST_TLS) + return MF->getInfo<ARMFunctionInfo>()->isSplitCSR() + ? CSR_iOS_CXX_TLS_PE_SaveList + : CSR_iOS_CXX_TLS_SaveList; return RegList; } +const MCPhysReg *ARMBaseRegisterInfo::getCalleeSavedRegsViaCopy( + const MachineFunction *MF) const { + assert(MF && "Invalid MachineFunction pointer."); + if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && + MF->getInfo<ARMFunctionInfo>()->isSplitCSR()) + return CSR_iOS_CXX_TLS_ViaCopy_SaveList; + return nullptr; +} + const uint32_t * ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { @@ -97,6 +110,8 @@ ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF, if (CC == CallingConv::GHC) // This is academic becase all GHC calls are (supposed to be) tail calls return CSR_NoRegs_RegMask; + if (STI.isTargetDarwin() && CC == CallingConv::CXX_FAST_TLS) + return CSR_iOS_CXX_TLS_RegMask; return STI.isTargetDarwin() ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; } @@ -106,6 +121,14 @@ ARMBaseRegisterInfo::getNoPreservedMask() const { } const uint32_t * +ARMBaseRegisterInfo::getTLSCallPreservedMask(const MachineFunction &MF) const { + assert(MF.getSubtarget<ARMSubtarget>().isTargetDarwin() && + "only know about special TLS call on Darwin"); + return CSR_iOS_TLSCall_RegMask; +} + + +const uint32_t * ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index cea8b80..6a9a45a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -62,6 +62,12 @@ static inline bool isARMArea3Register(unsigned Reg, bool isIOS) { switch (Reg) { case D15: case D14: case D13: case D12: case D11: case D10: case D9: case D8: + case D7: case D6: case D5: case D4: + case D3: case D2: case D1: case D0: + case D31: case D30: case D29: case D28: + case D27: case D26: case D25: case D24: + case D23: case D22: case D21: case D20: + case D19: case D18: case D17: case D16: return true; default: return false; @@ -92,9 +98,12 @@ protected: public: /// Code Generation virtual methods... const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const MCPhysReg * + getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override; const uint32_t *getNoPreservedMask() const override; + const uint32_t *getTLSCallPreservedMask(const MachineFunction &MF) const; /// getThisReturnPreservedMask - Returns a call preserved mask specific to the /// case that 'returned' is on an i32 first argument if the calling convention diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td index 2335164..847ef87 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td @@ -225,6 +225,21 @@ def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS_ThisReturn, R9))>; +def CSR_iOS_TLSCall : CalleeSavedRegs<(add LR, SP, + (sequence "R%u", 12, 1), + (sequence "D%u", 31, 0))>; + +// C++ TLS access function saves all registers except SP. Try to match +// the order of CSRs in CSR_iOS. +def CSR_iOS_CXX_TLS : CalleeSavedRegs<(add CSR_iOS, (sequence "R%u", 12, 1), + (sequence "D%u", 31, 0))>; + +// CSRs that are handled by prologue, epilogue. +def CSR_iOS_CXX_TLS_PE : CalleeSavedRegs<(add LR)>; + +// CSRs that are handled explicitly via copies. +def CSR_iOS_CXX_TLS_ViaCopy : CalleeSavedRegs<(sub CSR_iOS_CXX_TLS, LR)>; + // The "interrupt" attribute is used to generate code that is acceptable in // exception-handlers of various kinds. It makes us use a different return // instruction (handled elsewhere) and affects which registers we must return to diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index 9bdf823c..ff2fcfa 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -578,7 +578,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { // For now 32-bit only. - if (VT != MVT::i32) return 0; + if (VT != MVT::i32 || GV->isThreadLocal()) return 0; Reloc::Model RelocM = TM.getRelocationModel(); bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM); @@ -2083,6 +2083,9 @@ bool ARMFastISel::SelectRet(const Instruction *I) { if (!FuncInfo.CanLowerReturn) return false; + if (TLI.supportSplitCSR(FuncInfo.MF)) + return false; + // Build a list of return value registers. SmallVector<unsigned, 4> RetRegs; diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 0242440..dfbb969 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -622,7 +622,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, } if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && + N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { Base = N.getOperand(0); } else Base = N; @@ -801,7 +802,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = CurDAG->getTargetFrameIndex( FI, TLI->getPointerTy(CurDAG->getDataLayout())); } else if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && + N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { Base = N.getOperand(0); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -1067,7 +1069,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = CurDAG->getTargetFrameIndex( FI, TLI->getPointerTy(CurDAG->getDataLayout())); } else if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && + N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { Base = N.getOperand(0); } Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), @@ -1186,7 +1189,8 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, if (N.getOpcode() == ISD::ADD) { return false; // We want to select register offset instead } else if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && + N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { Base = N.getOperand(0); } else { Base = N; @@ -1292,7 +1296,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, } if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && + N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { Base = N.getOperand(0); if (Base.getOpcode() == ISD::TargetConstantPool) return false; // We want to select t2LDRpci instead. diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index 9cfb06b..37c0795 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -744,7 +744,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SUBE, MVT::i32, Custom); } - if (!Subtarget->isThumb1Only()) + if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); // ARM does not have ROTL. @@ -1385,6 +1385,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, else return CallingConv::ARM_AAPCS; case CallingConv::Fast: + case CallingConv::CXX_FAST_TLS: if (!Subtarget->isAAPCS_ABI()) { if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) return CallingConv::Fast; @@ -2347,6 +2348,19 @@ ARMTargetLowering::LowerReturn(SDValue Chain, Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } + const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); + const MCPhysReg *I = + TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); + if (I) { + for (; *I; ++I) { + if (ARM::GPRRegClass.contains(*I)) + RetOps.push_back(DAG.getRegister(*I, MVT::i32)); + else if (ARM::DPRRegClass.contains(*I)) + RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); + else + llvm_unreachable("Unexpected register class in CSRsViaCopy!"); + } + } // Update chain and glue. RetOps[0] = Chain; @@ -2530,6 +2544,72 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); } +/// \brief Convert a TLS address reference into the correct sequence of loads +/// and calls to compute the variable's address for Darwin, and return an +/// SDValue containing the final node. + +/// Darwin only has one TLS scheme which must be capable of dealing with the +/// fully general situation, in the worst case. This means: +/// + "extern __thread" declaration. +/// + Defined in a possibly unknown dynamic library. +/// +/// The general system is that each __thread variable has a [3 x i32] descriptor +/// which contains information used by the runtime to calculate the address. The +/// only part of this the compiler needs to know about is the first word, which +/// contains a function pointer that must be called with the address of the +/// entire descriptor in "r0". +/// +/// Since this descriptor may be in a different unit, in general access must +/// proceed along the usual ARM rules. A common sequence to produce is: +/// +/// movw rT1, :lower16:_var$non_lazy_ptr +/// movt rT1, :upper16:_var$non_lazy_ptr +/// ldr r0, [rT1] +/// ldr rT2, [r0] +/// blx rT2 +/// [...address now in r0...] +SDValue +ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin"); + SDLoc DL(Op); + + // First step is to get the address of the actua global symbol. This is where + // the TLS descriptor lives. + SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG); + + // The first entry in the descriptor is a function pointer that we must call + // to obtain the address of the variable. + SDValue Chain = DAG.getEntryNode(); + SDValue FuncTLVGet = + DAG.getLoad(MVT::i32, DL, Chain, DescAddr, + MachinePointerInfo::getGOT(DAG.getMachineFunction()), + false, true, true, 4); + Chain = FuncTLVGet.getValue(1); + + MachineFunction &F = DAG.getMachineFunction(); + MachineFrameInfo *MFI = F.getFrameInfo(); + MFI->setAdjustsStack(true); + + // TLS calls preserve all registers except those that absolutely must be + // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be + // silly). + auto TRI = + getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo(); + auto ARI = static_cast<const ARMRegisterInfo *>(TRI); + const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction()); + + // Finally, we can make the call. This is just a degenerate version of a + // normal AArch64 call node: r0 takes the address of the descriptor, and + // returns the address of the variable in this thread. + Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue()); + Chain = + DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue), + Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32), + DAG.getRegisterMask(Mask), Chain.getValue(1)); + return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1)); +} + // Lower ISD::GlobalTLSAddress using the "general dynamic" model SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, @@ -2631,9 +2711,11 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, SDValue ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { + if (Subtarget->isTargetDarwin()) + return LowerGlobalTLSAddressDarwin(Op, DAG); + // TODO: implement the "local dynamic" model - assert(Subtarget->isTargetELF() && - "TLS not implemented for non-ELF targets"); + assert(Subtarget->isTargetELF() && "Only ELF implemented here"); GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); if (DAG.getTarget().Options.EmulatedTLS) return LowerToTLSEmulatedModel(GA, DAG); @@ -11407,7 +11489,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; case 'J': - if (Subtarget->isThumb()) { // FIXME thumb2 + if (Subtarget->isThumb1Only()) { // This must be a constant between -255 and -1, for negated ADD // immediates. This can be used in GCC with an "n" modifier that // prints the negated value, for use with SUB instructions. It is @@ -11476,7 +11558,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; case 'M': - if (Subtarget->isThumb()) { // FIXME thumb2 + if (Subtarget->isThumb1Only()) { // This must be a multiple of 4 between 0 and 1020, for // ADD sp + immediate. if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) @@ -12324,3 +12406,49 @@ unsigned ARMTargetLowering::getExceptionSelectorRegister( // via the personality function. return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1; } + +void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { + // Update IsSplitCSR in ARMFunctionInfo. + ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>(); + AFI->setIsSplitCSR(true); +} + +void ARMTargetLowering::insertCopiesSplitCSR( + MachineBasicBlock *Entry, + const SmallVectorImpl<MachineBasicBlock *> &Exits) const { + const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); + const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); + if (!IStart) + return; + + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); + for (const MCPhysReg *I = IStart; *I; ++I) { + const TargetRegisterClass *RC = nullptr; + if (ARM::GPRRegClass.contains(*I)) + RC = &ARM::GPRRegClass; + else if (ARM::DPRRegClass.contains(*I)) + RC = &ARM::DPRRegClass; + else + llvm_unreachable("Unexpected register class in CSRsViaCopy!"); + + unsigned NewVR = MRI->createVirtualRegister(RC); + // Create copy from CSR to a virtual register. + // FIXME: this currently does not emit CFI pseudo-instructions, it works + // fine for CXX_FAST_TLS since the C++-style TLS access functions should be + // nounwind. If we want to generalize this later, we may need to emit + // CFI pseudo-instructions. + assert(Entry->getParent()->getFunction()->hasFnAttribute( + Attribute::NoUnwind) && + "Function should be nounwind in insertCopiesSplitCSR!"); + Entry->addLiveIn(*I); + BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), + NewVR) + .addReg(*I); + + for (auto *Exit : Exits) + BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), + *I) + .addReg(NewVR); + } +} diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index b764624..96b56c3 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -526,6 +526,8 @@ namespace llvm { SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA, SelectionDAG &DAG, TLSModel::Model model) const; + SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; @@ -578,6 +580,15 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals, bool isThisReturn, SDValue ThisVal) const; + bool supportSplitCSR(MachineFunction *MF) const override { + return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && + MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); + } + void initializeSplitCSR(MachineBasicBlock *Entry) const override; + void insertCopiesSplitCSR( + MachineBasicBlock *Entry, + const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index b9de83b..c446ba3 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -5398,6 +5398,27 @@ def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), Requires<[IsARM, UseMovt]>; } // isReMaterializable +// The many different faces of TLS access. +def : ARMPat<(ARMWrapper tglobaltlsaddr :$dst), + (MOVi32imm tglobaltlsaddr :$dst)>, + Requires<[IsARM, UseMovt]>; + +def : Pat<(ARMWrapper tglobaltlsaddr:$src), + (LDRLIT_ga_abs tglobaltlsaddr:$src)>, + Requires<[IsARM, DontUseMovt]>; + +def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr), + (MOV_ga_pcrel tglobaltlsaddr:$addr)>, Requires<[IsARM, UseMovt]>; + +def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr), + (LDRLIT_ga_pcrel tglobaltlsaddr:$addr)>, + Requires<[IsARM, DontUseMovt]>; +let AddedComplexity = 10 in +def : Pat<(load (ARMWrapperPIC tglobaltlsaddr:$addr)), + (MOV_ga_pcrel_ldr tglobaltlsaddr:$addr)>, + Requires<[IsARM, UseMovt]>; + + // ConstantPool, GlobalAddress, and JumpTable def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>, diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td index 7020ffb..defef4e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -5689,7 +5689,10 @@ def : NEONInstAlias<"vmov${p} $Vd, $Vm", // VMOV : Vector Move (Immediate) -let isReMaterializable = 1 in { +// Although VMOVs are not strictly speaking cheap, they are as expensive +// as their copies counterpart (VORR), so we should prefer rematerialization +// over splitting when it applies. +let isReMaterializable = 1, isAsCheapAsAMove=1 in { def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), (ins nImmSplatI8:$SIMM), IIC_VMOVImm, "vmov", "i8", "$Vd, $SIMM", "", @@ -5744,7 +5747,7 @@ def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, "vmov", "f32", "$Vd, $SIMM", "", [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; -} // isReMaterializable +} // isReMaterializable, isAsCheapAsAMove // Add support for bytes replication feature, so it could be GAS compatible. // E.g. instructions below: diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td index df6f243..5b1f9a0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -1366,6 +1366,14 @@ def tLDRLIT_ga_abs : PseudoInst<(outs tGPR:$dst), (ins i32imm:$src), (ARMWrapper tglobaladdr:$src))]>, Requires<[IsThumb, DontUseMovt]>; +// TLS globals +def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr), + (tLDRLIT_ga_pcrel tglobaltlsaddr:$addr)>, + Requires<[IsThumb, DontUseMovt]>; +def : Pat<(ARMWrapper tglobaltlsaddr:$addr), + (tLDRLIT_ga_abs tglobaltlsaddr:$addr)>, + Requires<[IsThumb, DontUseMovt]>; + // JumpTable def : T1Pat<(ARMWrapperJT tjumptable:$dst), diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td index d460d33..f42f456 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -3875,6 +3875,13 @@ def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr), } +def : T2Pat<(ARMWrapperPIC tglobaltlsaddr :$dst), + (t2MOV_ga_pcrel tglobaltlsaddr:$dst)>, + Requires<[IsThumb2, UseMovt]>; +def : T2Pat<(ARMWrapper tglobaltlsaddr:$dst), + (t2MOVi32imm tglobaltlsaddr:$dst)>, + Requires<[IsThumb2, UseMovt]>; + // ConstantPool, GlobalAddress, and JumpTable def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>; def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>, diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td index 050cd1a..63e7940 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -930,10 +930,10 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011, // and could enable the conversion to float to be removed completely. def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)), (VMOVDRR GPR:$Rl, (BFC GPR:$Rh, (i32 0x7FFFFFFF)))>, - Requires<[IsARM]>; + Requires<[IsARM, HasV6T2]>; def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)), (VMOVDRR GPR:$Rl, (t2BFC GPR:$Rh, (i32 0x7FFFFFFF)))>, - Requires<[IsThumb2]>; + Requires<[IsThumb2, HasV6T2]>; def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)), (VMOVDRR GPR:$Rl, (EORri GPR:$Rh, (i32 0x80000000)))>, Requires<[IsARM]>; diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp index ac0330f..71ad7a4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -20,4 +20,5 @@ ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF) RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), - PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false) {} + PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false), + IsSplitCSR(false) {} diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index d644797..68f9aec 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -118,6 +118,10 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// coalesced weights. DenseMap<const MachineBasicBlock*, unsigned> CoalescedWeights; + /// True if this function has a subset of CSRs that is handled explicitly via + /// copies. + bool IsSplitCSR; + public: ARMFunctionInfo() : isThumb(false), @@ -128,7 +132,7 @@ public: FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0), NumAlignedDPRCS2Regs(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false) {} + VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false) {} explicit ARMFunctionInfo(MachineFunction &MF); @@ -199,6 +203,9 @@ public: bool hasITBlocks() const { return HasITBlocks; } void setHasITBlocks(bool h) { HasITBlocks = h; } + bool isSplitCSR() const { return IsSplitCSR; } + void setIsSplitCSR(bool s) { IsSplitCSR = s; } + void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) { if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second) llvm_unreachable("Duplicate entries!"); diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 6084f22..57577dc 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -388,6 +388,9 @@ private: size_t calculateContentSize() const; + // Reset state between object emissions + void reset() override; + public: ARMTargetELFStreamer(MCStreamer &S) : ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::FK_INVALID), @@ -415,7 +418,7 @@ public: MCCodeEmitter *Emitter, bool IsThumb) : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) { - Reset(); + EHReset(); } ~ARMELFStreamer() {} @@ -579,7 +582,10 @@ private: } // Helper functions for ARM exception handling directives - void Reset(); + void EHReset(); + + // Reset state between object emissions + void reset() override; void EmitPersonalityFixup(StringRef Name); void FlushPendingOffset(); @@ -1040,6 +1046,8 @@ void ARMTargetELFStreamer::emitInst(uint32_t Inst, char Suffix) { getStreamer().emitInst(Inst, Suffix); } +void ARMTargetELFStreamer::reset() { AttributeSection = nullptr; } + void ARMELFStreamer::FinishImpl() { MCTargetStreamer &TS = *getTargetStreamer(); ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); @@ -1048,6 +1056,18 @@ void ARMELFStreamer::FinishImpl() { MCELFStreamer::FinishImpl(); } +void ARMELFStreamer::reset() { + MCTargetStreamer &TS = *getTargetStreamer(); + ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); + ATS.reset(); + MappingSymbolCounter = 0; + MCELFStreamer::reset(); + // MCELFStreamer clear's the assembler's e_flags. However, for + // arm we manually set the ABI version on streamer creation, so + // do the same here + getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); +} + inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags, @@ -1094,7 +1114,7 @@ void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) { Kind)); } -void ARMELFStreamer::Reset() { +void ARMELFStreamer::EHReset() { ExTab = nullptr; FnStart = nullptr; Personality = nullptr; @@ -1164,7 +1184,7 @@ void ARMELFStreamer::emitFnEnd() { SwitchSection(&FnStart->getSection()); // Clean exception handling frame information - Reset(); + EHReset(); } void ARMELFStreamer::emitCantUnwind() { CantUnwind = true; } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index dad50f2..c0d10c8 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -38,6 +38,9 @@ void ARMTargetStreamer::emitCurrentConstantPool() { // finish() - write out any non-empty assembler constant pools. void ARMTargetStreamer::finish() { ConstantPools->emitAll(Streamer); } +// reset() - Reset any state +void ARMTargetStreamer::reset() {} + // The remaining callbacks should be handled separately by each // streamer. void ARMTargetStreamer::emitFnStart() {} diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h index 812f983..27faac6 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h +++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h @@ -53,6 +53,11 @@ public: /// \p MBB will be correctly handled by the target. bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override; + /// Disable shrink wrap as tBfar/BL will be used to adjust for long jumps. + bool enableShrinkWrapping(const MachineFunction &MF) const override { + return false; + } + private: /// Check if the frame lowering of \p MF needs a special fixup /// code sequence for the epilogue. diff --git a/contrib/llvm/lib/Target/AVR/AVR.h b/contrib/llvm/lib/Target/AVR/AVR.h new file mode 100644 index 0000000..4c1667e --- /dev/null +++ b/contrib/llvm/lib/Target/AVR/AVR.h @@ -0,0 +1,54 @@ +//===-- AVR.h - Top-level interface for AVR representation ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// AVR back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AVR_H +#define LLVM_AVR_H + +#include "llvm/Target/TargetMachine.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" + +namespace llvm { + +class AVRTargetMachine; +class FunctionPass; + +FunctionPass *createAVRISelDag(AVRTargetMachine &TM, + CodeGenOpt::Level OptLevel); +FunctionPass *createAVRExpandPseudoPass(); +FunctionPass *createAVRFrameAnalyzerPass(); +FunctionPass *createAVRDynAllocaSRPass(); +FunctionPass *createAVRBranchSelectionPass(); + +/** + * Contains the AVR backend. + */ +namespace AVR { + +enum AddressSpace { DataMemory, ProgramMemory }; + +template <typename T> bool isProgramMemoryAddress(T *V) { + return cast<PointerType>(V->getType())->getAddressSpace() == ProgramMemory; +} + +inline bool isProgramMemoryAccess(MemSDNode const *N) { + auto V = N->getMemOperand()->getValue(); + + return (V != nullptr) ? isProgramMemoryAddress(V) : false; +} + +} // end of namespace AVR + +} // end namespace llvm + +#endif // LLVM_AVR_H diff --git a/contrib/llvm/lib/Target/AVR/AVRSelectionDAGInfo.h b/contrib/llvm/lib/Target/AVR/AVRSelectionDAGInfo.h new file mode 100644 index 0000000..ee832ad --- /dev/null +++ b/contrib/llvm/lib/Target/AVR/AVRSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- AVRSelectionDAGInfo.h - AVR SelectionDAG Info -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the AVR subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AVR_SELECTION_DAG_INFO_H +#define LLVM_AVR_SELECTION_DAG_INFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { +/** + * Holds information about the AVR instruction selection DAG. + */ +class AVRSelectionDAGInfo : public TargetSelectionDAGInfo { +public: +}; + +} // end namespace llvm + +#endif // LLVM_AVR_SELECTION_DAG_INFO_H diff --git a/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp b/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp new file mode 100644 index 0000000..85f03e8 --- /dev/null +++ b/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp @@ -0,0 +1,40 @@ +//===-- AVRTargetObjectFile.cpp - AVR Object Files ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AVRTargetObjectFile.h" + +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Mangler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/Support/ELF.h" + +#include "AVR.h" + +namespace llvm { +void AVRTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { + Base::Initialize(Ctx, TM); + ProgmemDataSection = + Ctx.getELFSection(".progmem.data", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); +} + +MCSection * +AVRTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { + // Global values in flash memory are placed in the progmem.data section + // unless they already have a user assigned section. + if (AVR::isProgramMemoryAddress(GV) && !GV->hasSection()) + return ProgmemDataSection; + + // Otherwise, we work the same way as ELF. + return Base::SelectSectionForGlobal(GV, Kind, Mang, TM); +} +} // end of namespace llvm diff --git a/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.h b/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.h new file mode 100644 index 0000000..bdda35b --- /dev/null +++ b/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.h @@ -0,0 +1,35 @@ +//===-- AVRTargetObjectFile.h - AVR Object Info -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AVR_TARGET_OBJECT_FILE_H +#define LLVM_AVR_TARGET_OBJECT_FILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" + +namespace llvm { +/** + * Lowering for an AVR ELF32 object file. + */ +class AVRTargetObjectFile : public TargetLoweringObjectFileELF { + typedef TargetLoweringObjectFileELF Base; + +public: + void Initialize(MCContext &ctx, const TargetMachine &TM) override; + + MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler &Mang, + const TargetMachine &TM) const override; + +private: + MCSection *ProgmemDataSection; +}; + +} // end namespace llvm + +#endif // LLVM_AVR_TARGET_OBJECT_FILE_H diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp index e213089..4c7c039 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -190,9 +190,9 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } -MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI, - MCStreamer &OutStreamer, - const MCOperand &Imm, int AlignSize) { +static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI, + MCStreamer &OutStreamer, const MCOperand &Imm, + int AlignSize) { MCSymbol *Sym; int64_t Value; if (Imm.getExpr()->evaluateAsAbsolute(Value)) { diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index 77907b0..4d2b545 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -1275,6 +1275,8 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, if (!BT.has(RD.Reg)) continue; const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg); + auto At = MI->isPHI() ? B.getFirstNonPHI() + : MachineBasicBlock::iterator(MI); // Find a source operand that is equal to the result. for (auto &Op : MI->uses()) { @@ -1298,7 +1300,7 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, DebugLoc DL = MI->getDebugLoc(); const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); unsigned NewR = MRI.createVirtualRegister(FRC); - BuildMI(B, I, DL, HII.get(TargetOpcode::COPY), NewR) + BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) .addReg(RS.Reg, 0, RS.Sub); HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); BT.put(BitTracker::RegisterRef(NewR), SC); @@ -1925,7 +1927,9 @@ bool BitSimplification::genPackhl(MachineInstr *MI, MachineBasicBlock &B = *MI->getParent(); unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass); DebugLoc DL = MI->getDebugLoc(); - BuildMI(B, MI, DL, HII.get(Hexagon::S2_packhl), NewR) + auto At = MI->isPHI() ? B.getFirstNonPHI() + : MachineBasicBlock::iterator(MI); + BuildMI(B, At, DL, HII.get(Hexagon::S2_packhl), NewR) .addReg(Rs.Reg, 0, Rs.Sub) .addReg(Rt.Reg, 0, Rt.Sub); HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); @@ -1950,9 +1954,11 @@ bool BitSimplification::genExtractHalf(MachineInstr *MI, // Prefer zxth, since zxth can go in any slot, while extractu only in // slots 2 and 3. unsigned NewR = 0; + auto At = MI->isPHI() ? B.getFirstNonPHI() + : MachineBasicBlock::iterator(MI); if (L.Low && Opc != Hexagon::A2_zxth) { NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); - BuildMI(B, MI, DL, HII.get(Hexagon::A2_zxth), NewR) + BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR) .addReg(L.Reg, 0, L.Sub); } else if (!L.Low && Opc != Hexagon::S2_extractu) { NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); @@ -1989,7 +1995,9 @@ bool BitSimplification::genCombineHalf(MachineInstr *MI, MachineBasicBlock &B = *MI->getParent(); DebugLoc DL = MI->getDebugLoc(); unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); - BuildMI(B, MI, DL, HII.get(COpc), NewR) + auto At = MI->isPHI() ? B.getFirstNonPHI() + : MachineBasicBlock::iterator(MI); + BuildMI(B, At, DL, HII.get(COpc), NewR) .addReg(H.Reg, 0, H.Sub) .addReg(L.Reg, 0, L.Sub); HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); @@ -2043,7 +2051,9 @@ bool BitSimplification::genExtractLow(MachineInstr *MI, continue; unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); - auto MIB = BuildMI(B, MI, DL, HII.get(NewOpc), NewR) + auto At = MI->isPHI() ? B.getFirstNonPHI() + : MachineBasicBlock::iterator(MI); + auto MIB = BuildMI(B, At, DL, HII.get(NewOpc), NewR) .addReg(RS.Reg, 0, RS.Sub); if (NewOpc == Hexagon::A2_andir) MIB.addImm((1 << W) - 1); @@ -2076,6 +2086,8 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI, if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI)) return false; MachineBasicBlock &B = *MI->getParent(); + auto At = MI->isPHI() ? B.getFirstNonPHI() + : MachineBasicBlock::iterator(MI); const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); const BitTracker::BitValue &V = SC[F+BN]; @@ -2098,7 +2110,7 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI, } if (P != UINT_MAX) { unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); - BuildMI(B, MI, DL, HII.get(Hexagon::S2_tstbit_i), NewR) + BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR) .addReg(RR.Reg, 0, RR.Sub) .addImm(P); HBS::replaceReg(RD.Reg, NewR, MRI); @@ -2108,7 +2120,7 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI, } else if (V.is(0) || V.is(1)) { unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); unsigned NewOpc = V.is(0) ? Hexagon::TFR_PdFalse : Hexagon::TFR_PdTrue; - BuildMI(B, MI, DL, HII.get(NewOpc), NewR); + BuildMI(B, At, DL, HII.get(NewOpc), NewR); HBS::replaceReg(RD.Reg, NewR, MRI); return true; } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td index 87d6b35..37c2042 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -3320,6 +3320,7 @@ class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp, /* u16_0Imm */ addr{15-0}))); // Store upper-half and store doubleword cannot be NV. let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1)); + let Uses = !if (isAbs, [], [GP]); let IClass = 0b0100; let Inst{27} = 1; @@ -3425,6 +3426,7 @@ class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp, bit isAbs> !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2}, !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1}, /* u16_0Imm */ addr{15-0}))); + let Uses = !if (isAbs, [], [GP]); let IClass = 0b0100; let Inst{27} = 1; @@ -3736,7 +3738,7 @@ defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>; // if ([!]Pv[.new]) Rx=mem[bhwd](##global) //===----------------------------------------------------------------------===// -let isAsmParserOnly = 1 in +let isAsmParserOnly = 1, Uses = [GP] in class T_LoadGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp, bits<3> MajOp> : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp>, PredNewRel { diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRDF.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRDF.cpp new file mode 100644 index 0000000..06719cd --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRDF.cpp @@ -0,0 +1,60 @@ +//===--- HexagonRDF.cpp ---------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HexagonRDF.h" +#include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" + +#include "llvm/CodeGen/MachineInstr.h" + +using namespace llvm; +using namespace rdf; + +bool HexagonRegisterAliasInfo::covers(RegisterRef RA, RegisterRef RB) const { + if (RA == RB) + return true; + + if (TargetRegisterInfo::isVirtualRegister(RA.Reg) && + TargetRegisterInfo::isVirtualRegister(RB.Reg)) { + // Hexagon-specific cases. + if (RA.Reg == RB.Reg) { + if (RA.Sub == 0) + return true; + if (RB.Sub == 0) + return false; + } + } + + return RegisterAliasInfo::covers(RA, RB); +} + +bool HexagonRegisterAliasInfo::covers(const RegisterSet &RRs, RegisterRef RR) + const { + if (RRs.count(RR)) + return true; + + if (!TargetRegisterInfo::isPhysicalRegister(RR.Reg)) { + assert(TargetRegisterInfo::isVirtualRegister(RR.Reg)); + // Check if both covering subregisters are present. + bool HasLo = RRs.count({RR.Reg, Hexagon::subreg_loreg}); + bool HasHi = RRs.count({RR.Reg, Hexagon::subreg_hireg}); + if (HasLo && HasHi) + return true; + } + + if (RR.Sub == 0) { + // Check if both covering subregisters are present. + unsigned Lo = TRI.getSubReg(RR.Reg, Hexagon::subreg_loreg); + unsigned Hi = TRI.getSubReg(RR.Reg, Hexagon::subreg_hireg); + if (RRs.count({Lo, 0}) && RRs.count({Hi, 0})) + return true; + } + + return RegisterAliasInfo::covers(RRs, RR); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRDF.h b/contrib/llvm/lib/Target/Hexagon/HexagonRDF.h new file mode 100644 index 0000000..00c1889 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRDF.h @@ -0,0 +1,28 @@ +//===--- HexagonRDF.h -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGON_RDF_H +#define HEXAGON_RDF_H +#include "RDFGraph.h" + +namespace llvm { + class TargetRegisterInfo; +} + +namespace rdf { + struct HexagonRegisterAliasInfo : public RegisterAliasInfo { + HexagonRegisterAliasInfo(const TargetRegisterInfo &TRI) + : RegisterAliasInfo(TRI) {} + bool covers(RegisterRef RA, RegisterRef RR) const override; + bool covers(const RegisterSet &RRs, RegisterRef RR) const override; + }; +} + +#endif + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp new file mode 100644 index 0000000..3fcda984 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp @@ -0,0 +1,272 @@ +//===--- HexagonRDFOpt.cpp ------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HexagonInstrInfo.h" +#include "HexagonRDF.h" +#include "HexagonSubtarget.h" +#include "RDFCopy.h" +#include "RDFDeadCode.h" +#include "RDFGraph.h" +#include "RDFLiveness.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominanceFrontier.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Format.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; +using namespace rdf; + +namespace llvm { + void initializeHexagonRDFOptPass(PassRegistry&); + FunctionPass *createHexagonRDFOpt(); +} + +namespace { + cl::opt<unsigned> RDFLimit("rdf-limit", cl::init(UINT_MAX)); + unsigned RDFCount = 0; + cl::opt<bool> RDFDump("rdf-dump", cl::init(false)); + + class HexagonRDFOpt : public MachineFunctionPass { + public: + HexagonRDFOpt() : MachineFunctionPass(ID) { + initializeHexagonRDFOptPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineDominanceFrontier>(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + const char *getPassName() const override { + return "Hexagon RDF optimizations"; + } + bool runOnMachineFunction(MachineFunction &MF) override; + + static char ID; + + private: + MachineDominatorTree *MDT; + MachineRegisterInfo *MRI; + }; + + char HexagonRDFOpt::ID = 0; +} + +INITIALIZE_PASS_BEGIN(HexagonRDFOpt, "rdfopt", "Hexagon RDF opt", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier) +INITIALIZE_PASS_END(HexagonRDFOpt, "rdfopt", "Hexagon RDF opt", false, false) + + +struct HexagonDCE : public DeadCodeElimination { + HexagonDCE(DataFlowGraph &G, MachineRegisterInfo &MRI) + : DeadCodeElimination(G, MRI) {} + bool rewrite(NodeAddr<InstrNode*> IA, SetVector<NodeId> &Remove); + void removeOperand(NodeAddr<InstrNode*> IA, unsigned OpNum); + + bool run(); +}; + + +bool HexagonDCE::run() { + bool Collected = collect(); + if (!Collected) + return false; + + const SetVector<NodeId> &DeadNodes = getDeadNodes(); + const SetVector<NodeId> &DeadInstrs = getDeadInstrs(); + + typedef DenseMap<NodeId,NodeId> RefToInstrMap; + RefToInstrMap R2I; + SetVector<NodeId> PartlyDead; + DataFlowGraph &DFG = getDFG(); + + for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) { + for (auto TA : BA.Addr->members_if(DFG.IsCode<NodeAttrs::Stmt>, DFG)) { + NodeAddr<StmtNode*> SA = TA; + for (NodeAddr<RefNode*> RA : SA.Addr->members(DFG)) { + R2I.insert(std::make_pair(RA.Id, SA.Id)); + if (DFG.IsDef(RA) && DeadNodes.count(RA.Id)) + if (!DeadInstrs.count(SA.Id)) + PartlyDead.insert(SA.Id); + } + } + } + + // Nodes to remove. + SetVector<NodeId> Remove = DeadInstrs; + + bool Changed = false; + for (NodeId N : PartlyDead) { + auto SA = DFG.addr<StmtNode*>(N); + if (trace()) + dbgs() << "Partly dead: " << *SA.Addr->getCode(); + Changed |= rewrite(SA, Remove); + } + + return erase(Remove) || Changed; +} + + +void HexagonDCE::removeOperand(NodeAddr<InstrNode*> IA, unsigned OpNum) { + MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode(); + + auto getOpNum = [MI] (MachineOperand &Op) -> unsigned { + for (unsigned i = 0, n = MI->getNumOperands(); i != n; ++i) + if (&MI->getOperand(i) == &Op) + return i; + llvm_unreachable("Invalid operand"); + }; + DenseMap<NodeId,unsigned> OpMap; + NodeList Refs = IA.Addr->members(getDFG()); + for (NodeAddr<RefNode*> RA : Refs) + OpMap.insert(std::make_pair(RA.Id, getOpNum(RA.Addr->getOp()))); + + MI->RemoveOperand(OpNum); + + for (NodeAddr<RefNode*> RA : Refs) { + unsigned N = OpMap[RA.Id]; + if (N < OpNum) + RA.Addr->setRegRef(&MI->getOperand(N)); + else if (N > OpNum) + RA.Addr->setRegRef(&MI->getOperand(N-1)); + } +} + + +bool HexagonDCE::rewrite(NodeAddr<InstrNode*> IA, SetVector<NodeId> &Remove) { + if (!getDFG().IsCode<NodeAttrs::Stmt>(IA)) + return false; + DataFlowGraph &DFG = getDFG(); + MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode(); + auto &HII = static_cast<const HexagonInstrInfo&>(DFG.getTII()); + if (HII.getAddrMode(MI) != HexagonII::PostInc) + return false; + unsigned Opc = MI->getOpcode(); + unsigned OpNum, NewOpc; + switch (Opc) { + case Hexagon::L2_loadri_pi: + NewOpc = Hexagon::L2_loadri_io; + OpNum = 1; + break; + case Hexagon::L2_loadrd_pi: + NewOpc = Hexagon::L2_loadrd_io; + OpNum = 1; + break; + case Hexagon::V6_vL32b_pi: + NewOpc = Hexagon::V6_vL32b_ai; + OpNum = 1; + break; + case Hexagon::S2_storeri_pi: + NewOpc = Hexagon::S2_storeri_io; + OpNum = 0; + break; + case Hexagon::S2_storerd_pi: + NewOpc = Hexagon::S2_storerd_io; + OpNum = 0; + break; + case Hexagon::V6_vS32b_pi: + NewOpc = Hexagon::V6_vS32b_ai; + OpNum = 0; + break; + default: + return false; + } + auto IsDead = [this] (NodeAddr<DefNode*> DA) -> bool { + return getDeadNodes().count(DA.Id); + }; + NodeList Defs; + MachineOperand &Op = MI->getOperand(OpNum); + for (NodeAddr<DefNode*> DA : IA.Addr->members_if(DFG.IsDef, DFG)) { + if (&DA.Addr->getOp() != &Op) + continue; + Defs = DFG.getRelatedRefs(IA, DA); + if (!std::all_of(Defs.begin(), Defs.end(), IsDead)) + return false; + break; + } + + // Mark all nodes in Defs for removal. + for (auto D : Defs) + Remove.insert(D.Id); + + if (trace()) + dbgs() << "Rewriting: " << *MI; + MI->setDesc(HII.get(NewOpc)); + MI->getOperand(OpNum+2).setImm(0); + removeOperand(IA, OpNum); + if (trace()) + dbgs() << " to: " << *MI; + + return true; +} + + +bool HexagonRDFOpt::runOnMachineFunction(MachineFunction &MF) { + if (RDFLimit.getPosition()) { + if (RDFCount >= RDFLimit) + return false; + RDFCount++; + } + + MDT = &getAnalysis<MachineDominatorTree>(); + const auto &MDF = getAnalysis<MachineDominanceFrontier>(); + const auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + const auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); + MRI = &MF.getRegInfo(); + + HexagonRegisterAliasInfo HAI(HRI); + TargetOperandInfo TOI(HII); + + if (RDFDump) + MF.print(dbgs() << "Before " << getPassName() << "\n", nullptr); + DataFlowGraph G(MF, HII, HRI, *MDT, MDF, HAI, TOI); + G.build(); + if (RDFDump) { + dbgs() << PrintNode<FuncNode*>(G.getFunc(), G) << '\n'; + dbgs() << MF.getName() << '\n'; + } + + bool Changed; + CopyPropagation CP(G); + CP.trace(RDFDump); + Changed = CP.run(); + if (Changed) + G.build(); + + HexagonDCE DCE(G, *MRI); + DCE.trace(RDFDump); + Changed |= DCE.run(); + + if (Changed) { + Liveness LV(*MRI, G); + LV.trace(RDFDump); + LV.computeLiveIns(); + LV.resetLiveIns(); + LV.resetKills(); + } + + if (RDFDump) + MF.print(dbgs() << "After " << getPassName() << "\n", nullptr); + return false; +} + + +FunctionPass *llvm::createHexagonRDFOpt() { + return new HexagonRDFOpt(); +} + + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 61c0589..6e5f732 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -103,6 +103,8 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF) Reserved.set(Hexagon::R30); Reserved.set(Hexagon::R31); Reserved.set(Hexagon::PC); + Reserved.set(Hexagon::GP); + Reserved.set(Hexagon::D14); Reserved.set(Hexagon::D15); Reserved.set(Hexagon::LC0); Reserved.set(Hexagon::LC1); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index 9dccd69..34b03fb 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -26,7 +26,11 @@ using namespace llvm; -static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops", + +static cl::opt<bool> EnableRDFOpt("rdf-opt", cl::Hidden, cl::ZeroOrMore, + cl::init(true), cl::desc("Enable RDF-based optimizations")); + +static cl::opt<bool> DisableHardwareLoops("disable-hexagon-hwloops", cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt", @@ -111,6 +115,7 @@ namespace llvm { FunctionPass *createHexagonOptimizeSZextends(); FunctionPass *createHexagonPacketizer(); FunctionPass *createHexagonPeephole(); + FunctionPass *createHexagonRDFOpt(); FunctionPass *createHexagonSplitConst32AndConst64(); FunctionPass *createHexagonSplitDoubleRegs(); FunctionPass *createHexagonStoreWidening(); @@ -262,9 +267,12 @@ void HexagonPassConfig::addPreRegAlloc() { } void HexagonPassConfig::addPostRegAlloc() { - if (getOptLevel() != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) { + if (EnableRDFOpt) + addPass(createHexagonRDFOpt()); if (!DisableHexagonCFGOpt) addPass(createHexagonCFGOptimizer(), false); + } } void HexagonPassConfig::addPreSched2() { diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index c2c6275..4b07ca7 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -334,21 +334,21 @@ static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI, // The only relocs left should be GP relative: default: if (MCID.mayStore() || MCID.mayLoad()) { - for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; - ++ImpUses) { - if (*ImpUses == Hexagon::GP) { - switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) { - case HexagonII::MemAccessSize::ByteAccess: - return fixup_Hexagon_GPREL16_0; - case HexagonII::MemAccessSize::HalfWordAccess: - return fixup_Hexagon_GPREL16_1; - case HexagonII::MemAccessSize::WordAccess: - return fixup_Hexagon_GPREL16_2; - case HexagonII::MemAccessSize::DoubleWordAccess: - return fixup_Hexagon_GPREL16_3; - default: - llvm_unreachable("unhandled fixup"); - } + for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); + ImpUses && *ImpUses; ++ImpUses) { + if (*ImpUses != Hexagon::GP) + continue; + switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) { + case HexagonII::MemAccessSize::ByteAccess: + return fixup_Hexagon_GPREL16_0; + case HexagonII::MemAccessSize::HalfWordAccess: + return fixup_Hexagon_GPREL16_1; + case HexagonII::MemAccessSize::WordAccess: + return fixup_Hexagon_GPREL16_2; + case HexagonII::MemAccessSize::DoubleWordAccess: + return fixup_Hexagon_GPREL16_3; + default: + llvm_unreachable("unhandled fixup"); } } } else diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp index 6ceb848..4e1cce3 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -95,14 +95,7 @@ unsigned HexagonResource::setWeight(unsigned s) { return (Weight); } -HexagonCVIResource::TypeUnitsAndLanes *HexagonCVIResource::TUL; - -bool HexagonCVIResource::SetUp = HexagonCVIResource::setup(); - -bool HexagonCVIResource::setup() { - assert(!TUL); - TUL = new (TypeUnitsAndLanes); - +void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) { (*TUL)[HexagonII::TypeCVI_VA] = UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); (*TUL)[HexagonII::TypeCVI_VA_DV] = UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2); @@ -123,13 +116,12 @@ bool HexagonCVIResource::setup() { (*TUL)[HexagonII::TypeCVI_VM_NEW_ST] = UnitsAndLanes(CVI_NONE, 0); (*TUL)[HexagonII::TypeCVI_VM_STU] = UnitsAndLanes(CVI_XLANE, 1); (*TUL)[HexagonII::TypeCVI_HIST] = UnitsAndLanes(CVI_XLANE, 4); - - return true; } -HexagonCVIResource::HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, +HexagonCVIResource::HexagonCVIResource(TypeUnitsAndLanes *TUL, + MCInstrInfo const &MCII, unsigned s, MCInst const *id) - : HexagonResource(s) { + : HexagonResource(s), TUL(TUL) { unsigned T = HexagonMCInstrInfo::getType(MCII, *id); if (TUL->count(T)) { @@ -153,6 +145,7 @@ HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI) : MCII(MCII), STI(STI) { reset(); + HexagonCVIResource::SetupTUL(&TUL, STI.getCPU()); } void HexagonShuffler::reset() { @@ -163,7 +156,7 @@ void HexagonShuffler::reset() { void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender, unsigned S, bool X) { - HexagonInstr PI(MCII, ID, Extender, S, X); + HexagonInstr PI(&TUL, MCII, ID, Extender, S, X); Packet.push_back(PI); } diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h index 174f10f..a093f85 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h @@ -20,6 +20,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" using namespace llvm; @@ -53,9 +54,11 @@ public: // HVX insn resources. class HexagonCVIResource : public HexagonResource { +public: typedef std::pair<unsigned, unsigned> UnitsAndLanes; typedef llvm::DenseMap<unsigned, UnitsAndLanes> TypeUnitsAndLanes; +private: // Available HVX slots. enum { CVI_NONE = 0, @@ -65,9 +68,7 @@ class HexagonCVIResource : public HexagonResource { CVI_MPY1 = 1 << 3 }; - static bool SetUp; - static bool setup(); - static TypeUnitsAndLanes *TUL; + TypeUnitsAndLanes *TUL; // Count of adjacent slots that the insn requires to be executed. unsigned Lanes; @@ -81,7 +82,9 @@ class HexagonCVIResource : public HexagonResource { void setStore(bool f = true) { Store = f; }; public: - HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, MCInst const *id); + HexagonCVIResource(TypeUnitsAndLanes *TUL, MCInstrInfo const &MCII, + unsigned s, MCInst const *id); + static void SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU); bool isValid() const { return (Valid); }; unsigned getLanes() const { return (Lanes); }; @@ -100,10 +103,11 @@ class HexagonInstr { bool SoloException; public: - HexagonInstr(MCInstrInfo const &MCII, MCInst const *id, + HexagonInstr(HexagonCVIResource::TypeUnitsAndLanes *T, + MCInstrInfo const &MCII, MCInst const *id, MCInst const *Extender, unsigned s, bool x = false) - : ID(id), Extender(Extender), Core(s), CVI(MCII, s, id), - SoloException(x){}; + : ID(id), Extender(Extender), Core(s), CVI(T, MCII, s, id), + SoloException(x) {}; MCInst const *getDesc() const { return (ID); }; @@ -136,6 +140,8 @@ class HexagonShuffler { // Shuffling error code. unsigned Error; + HexagonCVIResource::TypeUnitsAndLanes TUL; + protected: int64_t BundleFlags; MCInstrInfo const &MCII; diff --git a/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp b/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp new file mode 100644 index 0000000..c547c71 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp @@ -0,0 +1,180 @@ +//===--- RDFCopy.cpp ------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Simplistic RDF-based copy propagation. + +#include "RDFCopy.h" +#include "RDFGraph.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Support/CommandLine.h" + +#include <atomic> + +#ifndef NDEBUG +static cl::opt<unsigned> CpLimit("rdf-cp-limit", cl::init(0), cl::Hidden); +static unsigned CpCount = 0; +#endif + +using namespace llvm; +using namespace rdf; + +void CopyPropagation::recordCopy(NodeAddr<StmtNode*> SA, MachineInstr *MI) { + assert(MI->getOpcode() == TargetOpcode::COPY); + const MachineOperand &Op0 = MI->getOperand(0), &Op1 = MI->getOperand(1); + RegisterRef DstR = { Op0.getReg(), Op0.getSubReg() }; + RegisterRef SrcR = { Op1.getReg(), Op1.getSubReg() }; + auto FS = DefM.find(SrcR); + if (FS == DefM.end() || FS->second.empty()) + return; + Copies.push_back(SA.Id); + RDefMap[SrcR][SA.Id] = FS->second.top()->Id; + // Insert DstR into the map. + RDefMap[DstR]; +} + + +void CopyPropagation::updateMap(NodeAddr<InstrNode*> IA) { + RegisterSet RRs; + for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG)) + RRs.insert(RA.Addr->getRegRef()); + bool Common = false; + for (auto &R : RDefMap) { + if (!RRs.count(R.first)) + continue; + Common = true; + break; + } + if (!Common) + return; + + for (auto &R : RDefMap) { + if (!RRs.count(R.first)) + continue; + auto F = DefM.find(R.first); + if (F == DefM.end() || F->second.empty()) + continue; + R.second[IA.Id] = F->second.top()->Id; + } +} + + +bool CopyPropagation::scanBlock(MachineBasicBlock *B) { + bool Changed = false; + auto BA = DFG.getFunc().Addr->findBlock(B, DFG); + DFG.markBlock(BA.Id, DefM); + + for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) { + if (DFG.IsCode<NodeAttrs::Stmt>(IA)) { + NodeAddr<StmtNode*> SA = IA; + MachineInstr *MI = SA.Addr->getCode(); + if (MI->isCopy()) + recordCopy(SA, MI); + } + + updateMap(IA); + DFG.pushDefs(IA, DefM); + } + + MachineDomTreeNode *N = MDT.getNode(B); + for (auto I : *N) + Changed |= scanBlock(I->getBlock()); + + DFG.releaseBlock(BA.Id, DefM); + return Changed; +} + + +bool CopyPropagation::run() { + scanBlock(&DFG.getMF().front()); + + if (trace()) { + dbgs() << "Copies:\n"; + for (auto I : Copies) + dbgs() << *DFG.addr<StmtNode*>(I).Addr->getCode(); + dbgs() << "\nRDef map:\n"; + for (auto R : RDefMap) { + dbgs() << Print<RegisterRef>(R.first, DFG) << " -> {"; + for (auto &M : R.second) + dbgs() << ' ' << Print<NodeId>(M.first, DFG) << ':' + << Print<NodeId>(M.second, DFG); + dbgs() << " }\n"; + } + } + + bool Changed = false; + NodeSet Deleted; +#ifndef NDEBUG + bool HasLimit = CpLimit.getNumOccurrences() > 0; +#endif + + for (auto I : Copies) { +#ifndef NDEBUG + if (HasLimit && CpCount >= CpLimit) + break; +#endif + if (Deleted.count(I)) + continue; + auto SA = DFG.addr<InstrNode*>(I); + NodeList Ds = SA.Addr->members_if(DFG.IsDef, DFG); + if (Ds.size() != 1) + continue; + NodeAddr<DefNode*> DA = Ds[0]; + RegisterRef DR0 = DA.Addr->getRegRef(); + NodeList Us = SA.Addr->members_if(DFG.IsUse, DFG); + if (Us.size() != 1) + continue; + NodeAddr<UseNode*> UA0 = Us[0]; + RegisterRef UR0 = UA0.Addr->getRegRef(); + NodeId RD0 = UA0.Addr->getReachingDef(); + + for (NodeId N = DA.Addr->getReachedUse(), NextN; N; N = NextN) { + auto UA = DFG.addr<UseNode*>(N); + NextN = UA.Addr->getSibling(); + uint16_t F = UA.Addr->getFlags(); + if ((F & NodeAttrs::PhiRef) || (F & NodeAttrs::Fixed)) + continue; + if (UA.Addr->getRegRef() != DR0) + continue; + NodeAddr<InstrNode*> IA = UA.Addr->getOwner(DFG); + assert(DFG.IsCode<NodeAttrs::Stmt>(IA)); + MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode(); + if (RDefMap[UR0][IA.Id] != RD0) + continue; + MachineOperand &Op = UA.Addr->getOp(); + if (Op.isTied()) + continue; + if (trace()) { + dbgs() << "can replace " << Print<RegisterRef>(DR0, DFG) + << " with " << Print<RegisterRef>(UR0, DFG) << " in " + << *NodeAddr<StmtNode*>(IA).Addr->getCode(); + } + + Op.setReg(UR0.Reg); + Op.setSubReg(UR0.Sub); + Changed = true; +#ifndef NDEBUG + if (HasLimit && CpCount >= CpLimit) + break; + CpCount++; +#endif + + if (MI->isCopy()) { + MachineOperand &Op0 = MI->getOperand(0), &Op1 = MI->getOperand(1); + if (Op0.getReg() == Op1.getReg() && Op0.getSubReg() == Op1.getSubReg()) + MI->eraseFromParent(); + Deleted.insert(IA.Id); + } + } + } + + return Changed; +} + diff --git a/contrib/llvm/lib/Target/Hexagon/RDFCopy.h b/contrib/llvm/lib/Target/Hexagon/RDFCopy.h new file mode 100644 index 0000000..02531b9 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/RDFCopy.h @@ -0,0 +1,48 @@ +//===--- RDFCopy.h --------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef RDF_COPY_H +#define RDF_COPY_H + +#include "RDFGraph.h" +#include <map> +#include <vector> + +namespace llvm { + class MachineBasicBlock; + class MachineDominatorTree; + class MachineInstr; +} + +namespace rdf { + struct CopyPropagation { + CopyPropagation(DataFlowGraph &dfg) : MDT(dfg.getDT()), DFG(dfg), + Trace(false) {} + + bool run(); + void trace(bool On) { Trace = On; } + bool trace() const { return Trace; } + + private: + const MachineDominatorTree &MDT; + DataFlowGraph &DFG; + DataFlowGraph::DefStackMap DefM; + bool Trace; + + // map: register -> (map: stmt -> reaching def) + std::map<RegisterRef,std::map<NodeId,NodeId>> RDefMap; + std::vector<NodeId> Copies; + + void recordCopy(NodeAddr<StmtNode*> SA, MachineInstr *MI); + void updateMap(NodeAddr<InstrNode*> IA); + bool scanBlock(MachineBasicBlock *B); + }; +} + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp b/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp new file mode 100644 index 0000000..9566857 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp @@ -0,0 +1,204 @@ +//===--- RDFDeadCode.cpp --------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// RDF-based generic dead code elimination. + +#include "RDFGraph.h" +#include "RDFLiveness.h" +#include "RDFDeadCode.h" + +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; +using namespace rdf; + +// Check if the given instruction has observable side-effects, i.e. if +// it should be considered "live". It is safe for this function to be +// overly conservative (i.e. return "true" for all instructions), but it +// is not safe to return "false" for an instruction that should not be +// considered removable. +bool DeadCodeElimination::isLiveInstr(const MachineInstr *MI) const { + if (MI->mayStore() || MI->isBranch() || MI->isCall() || MI->isReturn()) + return true; + if (MI->hasOrderedMemoryRef() || MI->hasUnmodeledSideEffects()) + return true; + if (MI->isPHI()) + return false; + for (auto &Op : MI->operands()) + if (Op.isReg() && MRI.isReserved(Op.getReg())) + return true; + return false; +} + +void DeadCodeElimination::scanInstr(NodeAddr<InstrNode*> IA, + SetVector<NodeId> &WorkQ) { + if (!DFG.IsCode<NodeAttrs::Stmt>(IA)) + return; + if (!isLiveInstr(NodeAddr<StmtNode*>(IA).Addr->getCode())) + return; + for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG)) { + if (!LiveNodes.count(RA.Id)) + WorkQ.insert(RA.Id); + } +} + +void DeadCodeElimination::processDef(NodeAddr<DefNode*> DA, + SetVector<NodeId> &WorkQ) { + NodeAddr<InstrNode*> IA = DA.Addr->getOwner(DFG); + for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) { + if (!LiveNodes.count(UA.Id)) + WorkQ.insert(UA.Id); + } + for (NodeAddr<DefNode*> TA : DFG.getRelatedRefs(IA, DA)) + LiveNodes.insert(TA.Id); +} + +void DeadCodeElimination::processUse(NodeAddr<UseNode*> UA, + SetVector<NodeId> &WorkQ) { + for (NodeAddr<DefNode*> DA : LV.getAllReachingDefs(UA)) { + if (!LiveNodes.count(DA.Id)) + WorkQ.insert(DA.Id); + } +} + +// Traverse the DFG and collect the set dead RefNodes and the set of +// dead instructions. Return "true" if any of these sets is non-empty, +// "false" otherwise. +bool DeadCodeElimination::collect() { + // This function works by first finding all live nodes. The dead nodes + // are then the complement of the set of live nodes. + // + // Assume that all nodes are dead. Identify instructions which must be + // considered live, i.e. instructions with observable side-effects, such + // as calls and stores. All arguments of such instructions are considered + // live. For each live def, all operands used in the corresponding + // instruction are considered live. For each live use, all its reaching + // defs are considered live. + LiveNodes.clear(); + SetVector<NodeId> WorkQ; + for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) + for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) + scanInstr(IA, WorkQ); + + while (!WorkQ.empty()) { + NodeId N = *WorkQ.begin(); + WorkQ.remove(N); + LiveNodes.insert(N); + auto RA = DFG.addr<RefNode*>(N); + if (DFG.IsDef(RA)) + processDef(RA, WorkQ); + else + processUse(RA, WorkQ); + } + + if (trace()) { + dbgs() << "Live nodes:\n"; + for (NodeId N : LiveNodes) { + auto RA = DFG.addr<RefNode*>(N); + dbgs() << PrintNode<RefNode*>(RA, DFG) << "\n"; + } + } + + auto IsDead = [this] (NodeAddr<InstrNode*> IA) -> bool { + for (NodeAddr<DefNode*> DA : IA.Addr->members_if(DFG.IsDef, DFG)) + if (LiveNodes.count(DA.Id)) + return false; + return true; + }; + + for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) { + for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) { + for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG)) + if (!LiveNodes.count(RA.Id)) + DeadNodes.insert(RA.Id); + if (DFG.IsCode<NodeAttrs::Stmt>(IA)) + if (isLiveInstr(NodeAddr<StmtNode*>(IA).Addr->getCode())) + continue; + if (IsDead(IA)) { + DeadInstrs.insert(IA.Id); + if (trace()) + dbgs() << "Dead instr: " << PrintNode<InstrNode*>(IA, DFG) << "\n"; + } + } + } + + return !DeadNodes.empty(); +} + +// Erase the nodes given in the Nodes set from DFG. In addition to removing +// them from the DFG, if a node corresponds to a statement, the corresponding +// machine instruction is erased from the function. +bool DeadCodeElimination::erase(const SetVector<NodeId> &Nodes) { + if (Nodes.empty()) + return false; + + // Prepare the actual set of ref nodes to remove: ref nodes from Nodes + // are included directly, for each InstrNode in Nodes, include the set + // of all RefNodes from it. + NodeList DRNs, DINs; + for (auto I : Nodes) { + auto BA = DFG.addr<NodeBase*>(I); + uint16_t Type = BA.Addr->getType(); + if (Type == NodeAttrs::Ref) { + DRNs.push_back(DFG.addr<RefNode*>(I)); + continue; + } + + // If it's a code node, add all ref nodes from it. + uint16_t Kind = BA.Addr->getKind(); + if (Kind == NodeAttrs::Stmt || Kind == NodeAttrs::Phi) { + for (auto N : NodeAddr<CodeNode*>(BA).Addr->members(DFG)) + DRNs.push_back(N); + DINs.push_back(DFG.addr<InstrNode*>(I)); + } else { + llvm_unreachable("Unexpected code node"); + return false; + } + } + + // Sort the list so that use nodes are removed first. This makes the + // "unlink" functions a bit faster. + auto UsesFirst = [] (NodeAddr<RefNode*> A, NodeAddr<RefNode*> B) -> bool { + uint16_t KindA = A.Addr->getKind(), KindB = B.Addr->getKind(); + if (KindA == NodeAttrs::Use && KindB == NodeAttrs::Def) + return true; + if (KindA == NodeAttrs::Def && KindB == NodeAttrs::Use) + return false; + return A.Id < B.Id; + }; + std::sort(DRNs.begin(), DRNs.end(), UsesFirst); + + if (trace()) + dbgs() << "Removing dead ref nodes:\n"; + for (NodeAddr<RefNode*> RA : DRNs) { + if (trace()) + dbgs() << " " << PrintNode<RefNode*>(RA, DFG) << '\n'; + if (DFG.IsUse(RA)) + DFG.unlinkUse(RA); + else if (DFG.IsDef(RA)) + DFG.unlinkDef(RA); + } + + // Now, remove all dead instruction nodes. + for (NodeAddr<InstrNode*> IA : DINs) { + NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG); + BA.Addr->removeMember(IA, DFG); + if (!DFG.IsCode<NodeAttrs::Stmt>(IA)) + continue; + + MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode(); + if (trace()) + dbgs() << "erasing: " << *MI; + MI->eraseFromParent(); + } + return true; +} diff --git a/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.h b/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.h new file mode 100644 index 0000000..f4373fb --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.h @@ -0,0 +1,65 @@ +//===--- RDFDeadCode.h ----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// RDF-based generic dead code elimination. +// +// The main interface of this class are functions "collect" and "erase". +// This allows custom processing of the function being optimized by a +// particular consumer. The simplest way to use this class would be to +// instantiate an object, and then simply call "collect" and "erase", +// passing the result of "getDeadInstrs()" to it. +// A more complex scenario would be to call "collect" first, then visit +// all post-increment instructions to see if the address update is dead +// or not, and if it is, convert the instruction to a non-updating form. +// After that "erase" can be called with the set of nodes including both, +// dead defs from the updating instructions and the nodes corresponding +// to the dead instructions. + +#ifndef RDF_DEADCODE_H +#define RDF_DEADCODE_H + +#include "RDFGraph.h" +#include "RDFLiveness.h" +#include "llvm/ADT/SetVector.h" + +namespace llvm { + class MachineRegisterInfo; +} + +namespace rdf { + struct DeadCodeElimination { + DeadCodeElimination(DataFlowGraph &dfg, MachineRegisterInfo &mri) + : Trace(false), DFG(dfg), MRI(mri), LV(mri, dfg) {} + + bool collect(); + bool erase(const SetVector<NodeId> &Nodes); + void trace(bool On) { Trace = On; } + bool trace() const { return Trace; } + + SetVector<NodeId> getDeadNodes() { return DeadNodes; } + SetVector<NodeId> getDeadInstrs() { return DeadInstrs; } + DataFlowGraph &getDFG() { return DFG; } + + private: + bool Trace; + SetVector<NodeId> LiveNodes; + SetVector<NodeId> DeadNodes; + SetVector<NodeId> DeadInstrs; + DataFlowGraph &DFG; + MachineRegisterInfo &MRI; + Liveness LV; + + bool isLiveInstr(const MachineInstr *MI) const; + void scanInstr(NodeAddr<InstrNode*> IA, SetVector<NodeId> &WorkQ); + void processDef(NodeAddr<DefNode*> DA, SetVector<NodeId> &WorkQ); + void processUse(NodeAddr<UseNode*> UA, SetVector<NodeId> &WorkQ); + }; +} + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp new file mode 100644 index 0000000..9b47422 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp @@ -0,0 +1,1716 @@ +//===--- RDFGraph.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Target-independent, SSA-based data flow graph for register data flow (RDF). +// +#include "RDFGraph.h" + +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominanceFrontier.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; +using namespace rdf; + +// Printing functions. Have them here first, so that the rest of the code +// can use them. +namespace rdf { + +template<> +raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterRef> &P) { + auto &TRI = P.G.getTRI(); + if (P.Obj.Reg > 0 && P.Obj.Reg < TRI.getNumRegs()) + OS << TRI.getName(P.Obj.Reg); + else + OS << '#' << P.Obj.Reg; + if (P.Obj.Sub > 0) { + OS << ':'; + if (P.Obj.Sub < TRI.getNumSubRegIndices()) + OS << TRI.getSubRegIndexName(P.Obj.Sub); + else + OS << '#' << P.Obj.Sub; + } + return OS; +} + +template<> +raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) { + auto NA = P.G.addr<NodeBase*>(P.Obj); + uint16_t Attrs = NA.Addr->getAttrs(); + uint16_t Kind = NodeAttrs::kind(Attrs); + uint16_t Flags = NodeAttrs::flags(Attrs); + switch (NodeAttrs::type(Attrs)) { + case NodeAttrs::Code: + switch (Kind) { + case NodeAttrs::Func: OS << 'f'; break; + case NodeAttrs::Block: OS << 'b'; break; + case NodeAttrs::Stmt: OS << 's'; break; + case NodeAttrs::Phi: OS << 'p'; break; + default: OS << "c?"; break; + } + break; + case NodeAttrs::Ref: + if (Flags & NodeAttrs::Preserving) + OS << '+'; + if (Flags & NodeAttrs::Clobbering) + OS << '~'; + switch (Kind) { + case NodeAttrs::Use: OS << 'u'; break; + case NodeAttrs::Def: OS << 'd'; break; + case NodeAttrs::Block: OS << 'b'; break; + default: OS << "r?"; break; + } + break; + default: + OS << '?'; + break; + } + OS << P.Obj; + if (Flags & NodeAttrs::Shadow) + OS << '"'; + return OS; +} + +namespace { + void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA, + const DataFlowGraph &G) { + OS << Print<NodeId>(RA.Id, G) << '<' + << Print<RegisterRef>(RA.Addr->getRegRef(), G) << '>'; + if (RA.Addr->getFlags() & NodeAttrs::Fixed) + OS << '!'; + } +} + +template<> +raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<DefNode*>> &P) { + printRefHeader(OS, P.Obj, P.G); + OS << '('; + if (NodeId N = P.Obj.Addr->getReachingDef()) + OS << Print<NodeId>(N, P.G); + OS << ','; + if (NodeId N = P.Obj.Addr->getReachedDef()) + OS << Print<NodeId>(N, P.G); + OS << ','; + if (NodeId N = P.Obj.Addr->getReachedUse()) + OS << Print<NodeId>(N, P.G); + OS << "):"; + if (NodeId N = P.Obj.Addr->getSibling()) + OS << Print<NodeId>(N, P.G); + return OS; +} + +template<> +raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<UseNode*>> &P) { + printRefHeader(OS, P.Obj, P.G); + OS << '('; + if (NodeId N = P.Obj.Addr->getReachingDef()) + OS << Print<NodeId>(N, P.G); + OS << "):"; + if (NodeId N = P.Obj.Addr->getSibling()) + OS << Print<NodeId>(N, P.G); + return OS; +} + +template<> +raw_ostream &operator<< (raw_ostream &OS, + const Print<NodeAddr<PhiUseNode*>> &P) { + printRefHeader(OS, P.Obj, P.G); + OS << '('; + if (NodeId N = P.Obj.Addr->getReachingDef()) + OS << Print<NodeId>(N, P.G); + OS << ','; + if (NodeId N = P.Obj.Addr->getPredecessor()) + OS << Print<NodeId>(N, P.G); + OS << "):"; + if (NodeId N = P.Obj.Addr->getSibling()) + OS << Print<NodeId>(N, P.G); + return OS; +} + +template<> +raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<RefNode*>> &P) { + switch (P.Obj.Addr->getKind()) { + case NodeAttrs::Def: + OS << PrintNode<DefNode*>(P.Obj, P.G); + break; + case NodeAttrs::Use: + if (P.Obj.Addr->getFlags() & NodeAttrs::PhiRef) + OS << PrintNode<PhiUseNode*>(P.Obj, P.G); + else + OS << PrintNode<UseNode*>(P.Obj, P.G); + break; + } + return OS; +} + +template<> +raw_ostream &operator<< (raw_ostream &OS, const Print<NodeList> &P) { + unsigned N = P.Obj.size(); + for (auto I : P.Obj) { + OS << Print<NodeId>(I.Id, P.G); + if (--N) + OS << ' '; + } + return OS; +} + +template<> +raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) { + unsigned N = P.Obj.size(); + for (auto I : P.Obj) { + OS << Print<NodeId>(I, P.G); + if (--N) + OS << ' '; + } + return OS; +} + +namespace { + template <typename T> + struct PrintListV { + PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {} + typedef T Type; + const NodeList &List; + const DataFlowGraph &G; + }; + + template <typename T> + raw_ostream &operator<< (raw_ostream &OS, const PrintListV<T> &P) { + unsigned N = P.List.size(); + for (NodeAddr<T> A : P.List) { + OS << PrintNode<T>(A, P.G); + if (--N) + OS << ", "; + } + return OS; + } +} + +template<> +raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) { + OS << Print<NodeId>(P.Obj.Id, P.G) << ": phi [" + << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']'; + return OS; +} + +template<> +raw_ostream &operator<< (raw_ostream &OS, + const Print<NodeAddr<StmtNode*>> &P) { + unsigned Opc = P.Obj.Addr->getCode()->getOpcode(); + OS << Print<NodeId>(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc) + << " [" << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']'; + return OS; +} + +template<> +raw_ostream &operator<< (raw_ostream &OS, + const Print<NodeAddr<InstrNode*>> &P) { + switch (P.Obj.Addr->getKind()) { + case NodeAttrs::Phi: + OS << PrintNode<PhiNode*>(P.Obj, P.G); + break; + case NodeAttrs::Stmt: + OS << PrintNode<StmtNode*>(P.Obj, P.G); + break; + default: + OS << "instr? " << Print<NodeId>(P.Obj.Id, P.G); + break; + } + return OS; +} + +template<> +raw_ostream &operator<< (raw_ostream &OS, + const Print<NodeAddr<BlockNode*>> &P) { + auto *BB = P.Obj.Addr->getCode(); + unsigned NP = BB->pred_size(); + std::vector<int> Ns; + auto PrintBBs = [&OS,&P] (std::vector<int> Ns) -> void { + unsigned N = Ns.size(); + for (auto I : Ns) { + OS << "BB#" << I; + if (--N) + OS << ", "; + } + }; + + OS << Print<NodeId>(P.Obj.Id, P.G) << ": === BB#" << BB->getNumber() + << " === preds(" << NP << "): "; + for (auto I : BB->predecessors()) + Ns.push_back(I->getNumber()); + PrintBBs(Ns); + + unsigned NS = BB->succ_size(); + OS << " succs(" << NS << "): "; + Ns.clear(); + for (auto I : BB->successors()) + Ns.push_back(I->getNumber()); + PrintBBs(Ns); + OS << '\n'; + + for (auto I : P.Obj.Addr->members(P.G)) + OS << PrintNode<InstrNode*>(I, P.G) << '\n'; + return OS; +} + +template<> +raw_ostream &operator<< (raw_ostream &OS, + const Print<NodeAddr<FuncNode*>> &P) { + OS << "DFG dump:[\n" << Print<NodeId>(P.Obj.Id, P.G) << ": Function: " + << P.Obj.Addr->getCode()->getName() << '\n'; + for (auto I : P.Obj.Addr->members(P.G)) + OS << PrintNode<BlockNode*>(I, P.G) << '\n'; + OS << "]\n"; + return OS; +} + +template<> +raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterSet> &P) { + OS << '{'; + for (auto I : P.Obj) + OS << ' ' << Print<RegisterRef>(I, P.G); + OS << " }"; + return OS; +} + +template<> +raw_ostream &operator<< (raw_ostream &OS, + const Print<DataFlowGraph::DefStack> &P) { + for (auto I = P.Obj.top(), E = P.Obj.bottom(); I != E; ) { + OS << Print<NodeId>(I->Id, P.G) + << '<' << Print<RegisterRef>(I->Addr->getRegRef(), P.G) << '>'; + I.down(); + if (I != E) + OS << ' '; + } + return OS; +} + +} // namespace rdf + +// Node allocation functions. +// +// Node allocator is like a slab memory allocator: it allocates blocks of +// memory in sizes that are multiples of the size of a node. Each block has +// the same size. Nodes are allocated from the currently active block, and +// when it becomes full, a new one is created. +// There is a mapping scheme between node id and its location in a block, +// and within that block is described in the header file. +// +void NodeAllocator::startNewBlock() { + void *T = MemPool.Allocate(NodesPerBlock*NodeMemSize, NodeMemSize); + char *P = static_cast<char*>(T); + Blocks.push_back(P); + // Check if the block index is still within the allowed range, i.e. less + // than 2^N, where N is the number of bits in NodeId for the block index. + // BitsPerIndex is the number of bits per node index. + assert((Blocks.size() < (1U << (8*sizeof(NodeId)-BitsPerIndex))) && + "Out of bits for block index"); + ActiveEnd = P; +} + +bool NodeAllocator::needNewBlock() { + if (Blocks.empty()) + return true; + + char *ActiveBegin = Blocks.back(); + uint32_t Index = (ActiveEnd-ActiveBegin)/NodeMemSize; + return Index >= NodesPerBlock; +} + +NodeAddr<NodeBase*> NodeAllocator::New() { + if (needNewBlock()) + startNewBlock(); + + uint32_t ActiveB = Blocks.size()-1; + uint32_t Index = (ActiveEnd - Blocks[ActiveB])/NodeMemSize; + NodeAddr<NodeBase*> NA = { reinterpret_cast<NodeBase*>(ActiveEnd), + makeId(ActiveB, Index) }; + ActiveEnd += NodeMemSize; + return NA; +} + +NodeId NodeAllocator::id(const NodeBase *P) const { + uintptr_t A = reinterpret_cast<uintptr_t>(P); + for (unsigned i = 0, n = Blocks.size(); i != n; ++i) { + uintptr_t B = reinterpret_cast<uintptr_t>(Blocks[i]); + if (A < B || A >= B + NodesPerBlock*NodeMemSize) + continue; + uint32_t Idx = (A-B)/NodeMemSize; + return makeId(i, Idx); + } + llvm_unreachable("Invalid node address"); +} + +void NodeAllocator::clear() { + MemPool.Reset(); + Blocks.clear(); + ActiveEnd = nullptr; +} + + +// Insert node NA after "this" in the circular chain. +void NodeBase::append(NodeAddr<NodeBase*> NA) { + NodeId Nx = Next; + // If NA is already "next", do nothing. + if (Next != NA.Id) { + Next = NA.Id; + NA.Addr->Next = Nx; + } +} + + +// Fundamental node manipulator functions. + +// Obtain the register reference from a reference node. +RegisterRef RefNode::getRegRef() const { + assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref); + if (NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef) + return Ref.RR; + assert(Ref.Op != nullptr); + return { Ref.Op->getReg(), Ref.Op->getSubReg() }; +} + +// Set the register reference in the reference node directly (for references +// in phi nodes). +void RefNode::setRegRef(RegisterRef RR) { + assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref); + assert(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef); + Ref.RR = RR; +} + +// Set the register reference in the reference node based on a machine +// operand (for references in statement nodes). +void RefNode::setRegRef(MachineOperand *Op) { + assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref); + assert(!(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef)); + Ref.Op = Op; +} + +// Get the owner of a given reference node. +NodeAddr<NodeBase*> RefNode::getOwner(const DataFlowGraph &G) { + NodeAddr<NodeBase*> NA = G.addr<NodeBase*>(getNext()); + + while (NA.Addr != this) { + if (NA.Addr->getType() == NodeAttrs::Code) + return NA; + NA = G.addr<NodeBase*>(NA.Addr->getNext()); + } + llvm_unreachable("No owner in circular list"); +} + +// Connect the def node to the reaching def node. +void DefNode::linkToDef(NodeId Self, NodeAddr<DefNode*> DA) { + Ref.RD = DA.Id; + Ref.Sib = DA.Addr->getReachedDef(); + DA.Addr->setReachedDef(Self); +} + +// Connect the use node to the reaching def node. +void UseNode::linkToDef(NodeId Self, NodeAddr<DefNode*> DA) { + Ref.RD = DA.Id; + Ref.Sib = DA.Addr->getReachedUse(); + DA.Addr->setReachedUse(Self); +} + +// Get the first member of the code node. +NodeAddr<NodeBase*> CodeNode::getFirstMember(const DataFlowGraph &G) const { + if (Code.FirstM == 0) + return NodeAddr<NodeBase*>(); + return G.addr<NodeBase*>(Code.FirstM); +} + +// Get the last member of the code node. +NodeAddr<NodeBase*> CodeNode::getLastMember(const DataFlowGraph &G) const { + if (Code.LastM == 0) + return NodeAddr<NodeBase*>(); + return G.addr<NodeBase*>(Code.LastM); +} + +// Add node NA at the end of the member list of the given code node. +void CodeNode::addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) { + auto ML = getLastMember(G); + if (ML.Id != 0) { + ML.Addr->append(NA); + } else { + Code.FirstM = NA.Id; + NodeId Self = G.id(this); + NA.Addr->setNext(Self); + } + Code.LastM = NA.Id; +} + +// Add node NA after member node MA in the given code node. +void CodeNode::addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA, + const DataFlowGraph &G) { + MA.Addr->append(NA); + if (Code.LastM == MA.Id) + Code.LastM = NA.Id; +} + +// Remove member node NA from the given code node. +void CodeNode::removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) { + auto MA = getFirstMember(G); + assert(MA.Id != 0); + + // Special handling if the member to remove is the first member. + if (MA.Id == NA.Id) { + if (Code.LastM == MA.Id) { + // If it is the only member, set both first and last to 0. + Code.FirstM = Code.LastM = 0; + } else { + // Otherwise, advance the first member. + Code.FirstM = MA.Addr->getNext(); + } + return; + } + + while (MA.Addr != this) { + NodeId MX = MA.Addr->getNext(); + if (MX == NA.Id) { + MA.Addr->setNext(NA.Addr->getNext()); + // If the member to remove happens to be the last one, update the + // LastM indicator. + if (Code.LastM == NA.Id) + Code.LastM = MA.Id; + return; + } + MA = G.addr<NodeBase*>(MX); + } + llvm_unreachable("No such member"); +} + +// Return the list of all members of the code node. +NodeList CodeNode::members(const DataFlowGraph &G) const { + static auto True = [] (NodeAddr<NodeBase*>) -> bool { return true; }; + return members_if(True, G); +} + +// Return the owner of the given instr node. +NodeAddr<NodeBase*> InstrNode::getOwner(const DataFlowGraph &G) { + NodeAddr<NodeBase*> NA = G.addr<NodeBase*>(getNext()); + + while (NA.Addr != this) { + assert(NA.Addr->getType() == NodeAttrs::Code); + if (NA.Addr->getKind() == NodeAttrs::Block) + return NA; + NA = G.addr<NodeBase*>(NA.Addr->getNext()); + } + llvm_unreachable("No owner in circular list"); +} + +// Add the phi node PA to the given block node. +void BlockNode::addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G) { + auto M = getFirstMember(G); + if (M.Id == 0) { + addMember(PA, G); + return; + } + + assert(M.Addr->getType() == NodeAttrs::Code); + if (M.Addr->getKind() == NodeAttrs::Stmt) { + // If the first member of the block is a statement, insert the phi as + // the first member. + Code.FirstM = PA.Id; + PA.Addr->setNext(M.Id); + } else { + // If the first member is a phi, find the last phi, and append PA to it. + assert(M.Addr->getKind() == NodeAttrs::Phi); + NodeAddr<NodeBase*> MN = M; + do { + M = MN; + MN = G.addr<NodeBase*>(M.Addr->getNext()); + assert(MN.Addr->getType() == NodeAttrs::Code); + } while (MN.Addr->getKind() == NodeAttrs::Phi); + + // M is the last phi. + addMemberAfter(M, PA, G); + } +} + +// Find the block node corresponding to the machine basic block BB in the +// given func node. +NodeAddr<BlockNode*> FuncNode::findBlock(const MachineBasicBlock *BB, + const DataFlowGraph &G) const { + auto EqBB = [BB] (NodeAddr<NodeBase*> NA) -> bool { + return NodeAddr<BlockNode*>(NA).Addr->getCode() == BB; + }; + NodeList Ms = members_if(EqBB, G); + if (!Ms.empty()) + return Ms[0]; + return NodeAddr<BlockNode*>(); +} + +// Get the block node for the entry block in the given function. +NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) { + MachineBasicBlock *EntryB = &getCode()->front(); + return findBlock(EntryB, G); +} + + +// Register aliasing information. +// +// In theory, the lane information could be used to determine register +// covering (and aliasing), but depending on the sub-register structure, +// the lane mask information may be missing. The covering information +// must be available for this framework to work, so relying solely on +// the lane data is not sufficient. + +// Determine whether RA covers RB. +bool RegisterAliasInfo::covers(RegisterRef RA, RegisterRef RB) const { + if (RA == RB) + return true; + if (TargetRegisterInfo::isVirtualRegister(RA.Reg)) { + assert(TargetRegisterInfo::isVirtualRegister(RB.Reg)); + if (RA.Reg != RB.Reg) + return false; + if (RA.Sub == 0) + return true; + return TRI.composeSubRegIndices(RA.Sub, RB.Sub) == RA.Sub; + } + + assert(TargetRegisterInfo::isPhysicalRegister(RA.Reg) && + TargetRegisterInfo::isPhysicalRegister(RB.Reg)); + unsigned A = RA.Sub != 0 ? TRI.getSubReg(RA.Reg, RA.Sub) : RA.Reg; + unsigned B = RB.Sub != 0 ? TRI.getSubReg(RB.Reg, RB.Sub) : RB.Reg; + return TRI.isSubRegister(A, B); +} + +// Determine whether RR is covered by the set of references RRs. +bool RegisterAliasInfo::covers(const RegisterSet &RRs, RegisterRef RR) const { + if (RRs.count(RR)) + return true; + + // For virtual registers, we cannot accurately determine covering based + // on subregisters. If RR itself is not present in RRs, but it has a sub- + // register reference, check for the super-register alone. Otherwise, + // assume non-covering. + if (TargetRegisterInfo::isVirtualRegister(RR.Reg)) { + if (RR.Sub != 0) + return RRs.count({RR.Reg, 0}); + return false; + } + + // If any super-register of RR is present, then RR is covered. + unsigned Reg = RR.Sub == 0 ? RR.Reg : TRI.getSubReg(RR.Reg, RR.Sub); + for (MCSuperRegIterator SR(Reg, &TRI); SR.isValid(); ++SR) + if (RRs.count({*SR, 0})) + return true; + + return false; +} + +// Get the list of references aliased to RR. +std::vector<RegisterRef> RegisterAliasInfo::getAliasSet(RegisterRef RR) const { + // Do not include RR in the alias set. For virtual registers return an + // empty set. + std::vector<RegisterRef> AS; + if (TargetRegisterInfo::isVirtualRegister(RR.Reg)) + return AS; + assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg)); + unsigned R = RR.Reg; + if (RR.Sub) + R = TRI.getSubReg(RR.Reg, RR.Sub); + + for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI) + AS.push_back(RegisterRef({*AI, 0})); + return AS; +} + +// Check whether RA and RB are aliased. +bool RegisterAliasInfo::alias(RegisterRef RA, RegisterRef RB) const { + bool VirtA = TargetRegisterInfo::isVirtualRegister(RA.Reg); + bool VirtB = TargetRegisterInfo::isVirtualRegister(RB.Reg); + bool PhysA = TargetRegisterInfo::isPhysicalRegister(RA.Reg); + bool PhysB = TargetRegisterInfo::isPhysicalRegister(RB.Reg); + + if (VirtA != VirtB) + return false; + + if (VirtA) { + if (RA.Reg != RB.Reg) + return false; + // RA and RB refer to the same register. If any of them refer to the + // whole register, they must be aliased. + if (RA.Sub == 0 || RB.Sub == 0) + return true; + unsigned SA = TRI.getSubRegIdxSize(RA.Sub); + unsigned OA = TRI.getSubRegIdxOffset(RA.Sub); + unsigned SB = TRI.getSubRegIdxSize(RB.Sub); + unsigned OB = TRI.getSubRegIdxOffset(RB.Sub); + if (OA <= OB && OA+SA > OB) + return true; + if (OB <= OA && OB+SB > OA) + return true; + return false; + } + + assert(PhysA && PhysB); + (void)PhysA, (void)PhysB; + unsigned A = RA.Sub ? TRI.getSubReg(RA.Reg, RA.Sub) : RA.Reg; + unsigned B = RB.Sub ? TRI.getSubReg(RB.Reg, RB.Sub) : RB.Reg; + for (MCRegAliasIterator I(A, &TRI, true); I.isValid(); ++I) + if (B == *I) + return true; + return false; +} + + +// Target operand information. +// + +// For a given instruction, check if there are any bits of RR that can remain +// unchanged across this def. +bool TargetOperandInfo::isPreserving(const MachineInstr &In, unsigned OpNum) + const { + return TII.isPredicated(&In); +} + +// Check if the definition of RR produces an unspecified value. +bool TargetOperandInfo::isClobbering(const MachineInstr &In, unsigned OpNum) + const { + if (In.isCall()) + if (In.getOperand(OpNum).isImplicit()) + return true; + return false; +} + +// Check if the given instruction specifically requires +bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum) + const { + if (In.isCall() || In.isReturn()) + return true; + const MCInstrDesc &D = In.getDesc(); + if (!D.getImplicitDefs() && !D.getImplicitUses()) + return false; + const MachineOperand &Op = In.getOperand(OpNum); + // If there is a sub-register, treat the operand as non-fixed. Currently, + // fixed registers are those that are listed in the descriptor as implicit + // uses or defs, and those lists do not allow sub-registers. + if (Op.getSubReg() != 0) + return false; + unsigned Reg = Op.getReg(); + const MCPhysReg *ImpR = Op.isDef() ? D.getImplicitDefs() + : D.getImplicitUses(); + if (!ImpR) + return false; + while (*ImpR) + if (*ImpR++ == Reg) + return true; + return false; +} + + +// +// The data flow graph construction. +// + +DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii, + const TargetRegisterInfo &tri, const MachineDominatorTree &mdt, + const MachineDominanceFrontier &mdf, const RegisterAliasInfo &rai, + const TargetOperandInfo &toi) + : TimeG("rdf"), MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), RAI(rai), + TOI(toi) { +} + + +// The implementation of the definition stack. +// Each register reference has its own definition stack. In particular, +// for a register references "Reg" and "Reg:subreg" will each have their +// own definition stacks. + +// Construct a stack iterator. +DataFlowGraph::DefStack::Iterator::Iterator(const DataFlowGraph::DefStack &S, + bool Top) : DS(S) { + if (!Top) { + // Initialize to bottom. + Pos = 0; + return; + } + // Initialize to the top, i.e. top-most non-delimiter (or 0, if empty). + Pos = DS.Stack.size(); + while (Pos > 0 && DS.isDelimiter(DS.Stack[Pos-1])) + Pos--; +} + +// Return the size of the stack, including block delimiters. +unsigned DataFlowGraph::DefStack::size() const { + unsigned S = 0; + for (auto I = top(), E = bottom(); I != E; I.down()) + S++; + return S; +} + +// Remove the top entry from the stack. Remove all intervening delimiters +// so that after this, the stack is either empty, or the top of the stack +// is a non-delimiter. +void DataFlowGraph::DefStack::pop() { + assert(!empty()); + unsigned P = nextDown(Stack.size()); + Stack.resize(P); +} + +// Push a delimiter for block node N on the stack. +void DataFlowGraph::DefStack::start_block(NodeId N) { + assert(N != 0); + Stack.push_back(NodeAddr<DefNode*>(nullptr, N)); +} + +// Remove all nodes from the top of the stack, until the delimited for +// block node N is encountered. Remove the delimiter as well. In effect, +// this will remove from the stack all definitions from block N. +void DataFlowGraph::DefStack::clear_block(NodeId N) { + assert(N != 0); + unsigned P = Stack.size(); + while (P > 0) { + bool Found = isDelimiter(Stack[P-1], N); + P--; + if (Found) + break; + } + // This will also remove the delimiter, if found. + Stack.resize(P); +} + +// Move the stack iterator up by one. +unsigned DataFlowGraph::DefStack::nextUp(unsigned P) const { + // Get the next valid position after P (skipping all delimiters). + // The input position P does not have to point to a non-delimiter. + unsigned SS = Stack.size(); + bool IsDelim; + assert(P < SS); + do { + P++; + IsDelim = isDelimiter(Stack[P-1]); + } while (P < SS && IsDelim); + assert(!IsDelim); + return P; +} + +// Move the stack iterator down by one. +unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const { + // Get the preceding valid position before P (skipping all delimiters). + // The input position P does not have to point to a non-delimiter. + assert(P > 0 && P <= Stack.size()); + bool IsDelim = isDelimiter(Stack[P-1]); + do { + if (--P == 0) + break; + IsDelim = isDelimiter(Stack[P-1]); + } while (P > 0 && IsDelim); + assert(!IsDelim); + return P; +} + +// Node management functions. + +// Get the pointer to the node with the id N. +NodeBase *DataFlowGraph::ptr(NodeId N) const { + if (N == 0) + return nullptr; + return Memory.ptr(N); +} + +// Get the id of the node at the address P. +NodeId DataFlowGraph::id(const NodeBase *P) const { + if (P == nullptr) + return 0; + return Memory.id(P); +} + +// Allocate a new node and set the attributes to Attrs. +NodeAddr<NodeBase*> DataFlowGraph::newNode(uint16_t Attrs) { + NodeAddr<NodeBase*> P = Memory.New(); + P.Addr->init(); + P.Addr->setAttrs(Attrs); + return P; +} + +// Make a copy of the given node B, except for the data-flow links, which +// are set to 0. +NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) { + NodeAddr<NodeBase*> NA = newNode(0); + memcpy(NA.Addr, B.Addr, sizeof(NodeBase)); + // Ref nodes need to have the data-flow links reset. + if (NA.Addr->getType() == NodeAttrs::Ref) { + NodeAddr<RefNode*> RA = NA; + RA.Addr->setReachingDef(0); + RA.Addr->setSibling(0); + if (NA.Addr->getKind() == NodeAttrs::Def) { + NodeAddr<DefNode*> DA = NA; + DA.Addr->setReachedDef(0); + DA.Addr->setReachedUse(0); + } + } + return NA; +} + + +// Allocation routines for specific node types/kinds. + +NodeAddr<UseNode*> DataFlowGraph::newUse(NodeAddr<InstrNode*> Owner, + MachineOperand &Op, uint16_t Flags) { + NodeAddr<UseNode*> UA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags); + UA.Addr->setRegRef(&Op); + return UA; +} + +NodeAddr<PhiUseNode*> DataFlowGraph::newPhiUse(NodeAddr<PhiNode*> Owner, + RegisterRef RR, NodeAddr<BlockNode*> PredB, uint16_t Flags) { + NodeAddr<PhiUseNode*> PUA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags); + assert(Flags & NodeAttrs::PhiRef); + PUA.Addr->setRegRef(RR); + PUA.Addr->setPredecessor(PredB.Id); + return PUA; +} + +NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner, + MachineOperand &Op, uint16_t Flags) { + NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags); + DA.Addr->setRegRef(&Op); + return DA; +} + +NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner, + RegisterRef RR, uint16_t Flags) { + NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags); + assert(Flags & NodeAttrs::PhiRef); + DA.Addr->setRegRef(RR); + return DA; +} + +NodeAddr<PhiNode*> DataFlowGraph::newPhi(NodeAddr<BlockNode*> Owner) { + NodeAddr<PhiNode*> PA = newNode(NodeAttrs::Code | NodeAttrs::Phi); + Owner.Addr->addPhi(PA, *this); + return PA; +} + +NodeAddr<StmtNode*> DataFlowGraph::newStmt(NodeAddr<BlockNode*> Owner, + MachineInstr *MI) { + NodeAddr<StmtNode*> SA = newNode(NodeAttrs::Code | NodeAttrs::Stmt); + SA.Addr->setCode(MI); + Owner.Addr->addMember(SA, *this); + return SA; +} + +NodeAddr<BlockNode*> DataFlowGraph::newBlock(NodeAddr<FuncNode*> Owner, + MachineBasicBlock *BB) { + NodeAddr<BlockNode*> BA = newNode(NodeAttrs::Code | NodeAttrs::Block); + BA.Addr->setCode(BB); + Owner.Addr->addMember(BA, *this); + return BA; +} + +NodeAddr<FuncNode*> DataFlowGraph::newFunc(MachineFunction *MF) { + NodeAddr<FuncNode*> FA = newNode(NodeAttrs::Code | NodeAttrs::Func); + FA.Addr->setCode(MF); + return FA; +} + +// Build the data flow graph. +void DataFlowGraph::build() { + reset(); + Func = newFunc(&MF); + + if (MF.empty()) + return; + + for (auto &B : MF) { + auto BA = newBlock(Func, &B); + for (auto &I : B) { + if (I.isDebugValue()) + continue; + buildStmt(BA, I); + } + } + + // Collect information about block references. + NodeAddr<BlockNode*> EA = Func.Addr->getEntryBlock(*this); + BlockRefsMap RefM; + buildBlockRefs(EA, RefM); + + // Add function-entry phi nodes. + MachineRegisterInfo &MRI = MF.getRegInfo(); + for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) { + NodeAddr<PhiNode*> PA = newPhi(EA); + RegisterRef RR = { I->first, 0 }; + uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving; + NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags); + PA.Addr->addMember(DA, *this); + } + + // Build a map "PhiM" which will contain, for each block, the set + // of references that will require phi definitions in that block. + BlockRefsMap PhiM; + auto Blocks = Func.Addr->members(*this); + for (NodeAddr<BlockNode*> BA : Blocks) + recordDefsForDF(PhiM, RefM, BA); + for (NodeAddr<BlockNode*> BA : Blocks) + buildPhis(PhiM, RefM, BA); + + // Link all the refs. This will recursively traverse the dominator tree. + DefStackMap DM; + linkBlockRefs(DM, EA); + + // Finally, remove all unused phi nodes. + removeUnusedPhis(); +} + +// For each stack in the map DefM, push the delimiter for block B on it. +void DataFlowGraph::markBlock(NodeId B, DefStackMap &DefM) { + // Push block delimiters. + for (auto I = DefM.begin(), E = DefM.end(); I != E; ++I) + I->second.start_block(B); +} + +// Remove all definitions coming from block B from each stack in DefM. +void DataFlowGraph::releaseBlock(NodeId B, DefStackMap &DefM) { + // Pop all defs from this block from the definition stack. Defs that were + // added to the map during the traversal of instructions will not have a + // delimiter, but for those, the whole stack will be emptied. + for (auto I = DefM.begin(), E = DefM.end(); I != E; ++I) + I->second.clear_block(B); + + // Finally, remove empty stacks from the map. + for (auto I = DefM.begin(), E = DefM.end(), NextI = I; I != E; I = NextI) { + NextI = std::next(I); + // This preserves the validity of iterators other than I. + if (I->second.empty()) + DefM.erase(I); + } +} + +// Push all definitions from the instruction node IA to an appropriate +// stack in DefM. +void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) { + NodeList Defs = IA.Addr->members_if(IsDef, *this); + NodeSet Visited; +#ifndef NDEBUG + RegisterSet Defined; +#endif + + // The important objectives of this function are: + // - to be able to handle instructions both while the graph is being + // constructed, and after the graph has been constructed, and + // - maintain proper ordering of definitions on the stack for each + // register reference: + // - if there are two or more related defs in IA (i.e. coming from + // the same machine operand), then only push one def on the stack, + // - if there are multiple unrelated defs of non-overlapping + // subregisters of S, then the stack for S will have both (in an + // unspecified order), but the order does not matter from the data- + // -flow perspective. + + for (NodeAddr<DefNode*> DA : Defs) { + if (Visited.count(DA.Id)) + continue; + NodeList Rel = getRelatedRefs(IA, DA); + NodeAddr<DefNode*> PDA = Rel.front(); + // Push the definition on the stack for the register and all aliases. + RegisterRef RR = PDA.Addr->getRegRef(); +#ifndef NDEBUG + // Assert if the register is defined in two or more unrelated defs. + // This could happen if there are two or more def operands defining it. + if (!Defined.insert(RR).second) { + auto *MI = NodeAddr<StmtNode*>(IA).Addr->getCode(); + dbgs() << "Multiple definitions of register: " + << Print<RegisterRef>(RR, *this) << " in\n " << *MI + << "in BB#" << MI->getParent()->getNumber() << '\n'; + llvm_unreachable(nullptr); + } +#endif + DefM[RR].push(DA); + for (auto A : RAI.getAliasSet(RR)) { + assert(A != RR); + DefM[A].push(DA); + } + // Mark all the related defs as visited. + for (auto T : Rel) + Visited.insert(T.Id); + } +} + +// Return the list of all reference nodes related to RA, including RA itself. +// See "getNextRelated" for the meaning of a "related reference". +NodeList DataFlowGraph::getRelatedRefs(NodeAddr<InstrNode*> IA, + NodeAddr<RefNode*> RA) const { + assert(IA.Id != 0 && RA.Id != 0); + + NodeList Refs; + NodeId Start = RA.Id; + do { + Refs.push_back(RA); + RA = getNextRelated(IA, RA); + } while (RA.Id != 0 && RA.Id != Start); + return Refs; +} + + +// Clear all information in the graph. +void DataFlowGraph::reset() { + Memory.clear(); + Func = NodeAddr<FuncNode*>(); +} + + +// Return the next reference node in the instruction node IA that is related +// to RA. Conceptually, two reference nodes are related if they refer to the +// same instance of a register access, but differ in flags or other minor +// characteristics. Specific examples of related nodes are shadow reference +// nodes. +// Return the equivalent of nullptr if there are no more related references. +NodeAddr<RefNode*> DataFlowGraph::getNextRelated(NodeAddr<InstrNode*> IA, + NodeAddr<RefNode*> RA) const { + assert(IA.Id != 0 && RA.Id != 0); + + auto Related = [RA](NodeAddr<RefNode*> TA) -> bool { + if (TA.Addr->getKind() != RA.Addr->getKind()) + return false; + if (TA.Addr->getRegRef() != RA.Addr->getRegRef()) + return false; + return true; + }; + auto RelatedStmt = [&Related,RA](NodeAddr<RefNode*> TA) -> bool { + return Related(TA) && + &RA.Addr->getOp() == &TA.Addr->getOp(); + }; + auto RelatedPhi = [&Related,RA](NodeAddr<RefNode*> TA) -> bool { + if (!Related(TA)) + return false; + if (TA.Addr->getKind() != NodeAttrs::Use) + return true; + // For phi uses, compare predecessor blocks. + const NodeAddr<const PhiUseNode*> TUA = TA; + const NodeAddr<const PhiUseNode*> RUA = RA; + return TUA.Addr->getPredecessor() == RUA.Addr->getPredecessor(); + }; + + RegisterRef RR = RA.Addr->getRegRef(); + if (IA.Addr->getKind() == NodeAttrs::Stmt) + return RA.Addr->getNextRef(RR, RelatedStmt, true, *this); + return RA.Addr->getNextRef(RR, RelatedPhi, true, *this); +} + +// Find the next node related to RA in IA that satisfies condition P. +// If such a node was found, return a pair where the second element is the +// located node. If such a node does not exist, return a pair where the +// first element is the element after which such a node should be inserted, +// and the second element is a null-address. +template <typename Predicate> +std::pair<NodeAddr<RefNode*>,NodeAddr<RefNode*>> +DataFlowGraph::locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA, + Predicate P) const { + assert(IA.Id != 0 && RA.Id != 0); + + NodeAddr<RefNode*> NA; + NodeId Start = RA.Id; + while (true) { + NA = getNextRelated(IA, RA); + if (NA.Id == 0 || NA.Id == Start) + break; + if (P(NA)) + break; + RA = NA; + } + + if (NA.Id != 0 && NA.Id != Start) + return std::make_pair(RA, NA); + return std::make_pair(RA, NodeAddr<RefNode*>()); +} + +// Get the next shadow node in IA corresponding to RA, and optionally create +// such a node if it does not exist. +NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA, + NodeAddr<RefNode*> RA, bool Create) { + assert(IA.Id != 0 && RA.Id != 0); + + uint16_t Flags = RA.Addr->getFlags() | NodeAttrs::Shadow; + auto IsShadow = [Flags] (NodeAddr<RefNode*> TA) -> bool { + return TA.Addr->getFlags() == Flags; + }; + auto Loc = locateNextRef(IA, RA, IsShadow); + if (Loc.second.Id != 0 || !Create) + return Loc.second; + + // Create a copy of RA and mark is as shadow. + NodeAddr<RefNode*> NA = cloneNode(RA); + NA.Addr->setFlags(Flags | NodeAttrs::Shadow); + IA.Addr->addMemberAfter(Loc.first, NA, *this); + return NA; +} + +// Get the next shadow node in IA corresponding to RA. Return null-address +// if such a node does not exist. +NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA, + NodeAddr<RefNode*> RA) const { + assert(IA.Id != 0 && RA.Id != 0); + uint16_t Flags = RA.Addr->getFlags() | NodeAttrs::Shadow; + auto IsShadow = [Flags] (NodeAddr<RefNode*> TA) -> bool { + return TA.Addr->getFlags() == Flags; + }; + return locateNextRef(IA, RA, IsShadow).second; +} + +// Create a new statement node in the block node BA that corresponds to +// the machine instruction MI. +void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) { + auto SA = newStmt(BA, &In); + + // Collect a set of registers that this instruction implicitly uses + // or defines. Implicit operands from an instruction will be ignored + // unless they are listed here. + RegisterSet ImpUses, ImpDefs; + if (const uint16_t *ImpD = In.getDesc().getImplicitDefs()) + while (uint16_t R = *ImpD++) + ImpDefs.insert({R, 0}); + if (const uint16_t *ImpU = In.getDesc().getImplicitUses()) + while (uint16_t R = *ImpU++) + ImpUses.insert({R, 0}); + + bool IsCall = In.isCall(), IsReturn = In.isReturn(); + bool IsPredicated = TII.isPredicated(&In); + unsigned NumOps = In.getNumOperands(); + + // Avoid duplicate implicit defs. This will not detect cases of implicit + // defs that define registers that overlap, but it is not clear how to + // interpret that in the absence of explicit defs. Overlapping explicit + // defs are likely illegal already. + RegisterSet DoneDefs; + // Process explicit defs first. + for (unsigned OpN = 0; OpN < NumOps; ++OpN) { + MachineOperand &Op = In.getOperand(OpN); + if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) + continue; + RegisterRef RR = { Op.getReg(), Op.getSubReg() }; + uint16_t Flags = NodeAttrs::None; + if (TOI.isPreserving(In, OpN)) + Flags |= NodeAttrs::Preserving; + if (TOI.isClobbering(In, OpN)) + Flags |= NodeAttrs::Clobbering; + if (TOI.isFixedReg(In, OpN)) + Flags |= NodeAttrs::Fixed; + NodeAddr<DefNode*> DA = newDef(SA, Op, Flags); + SA.Addr->addMember(DA, *this); + DoneDefs.insert(RR); + } + + // Process implicit defs, skipping those that have already been added + // as explicit. + for (unsigned OpN = 0; OpN < NumOps; ++OpN) { + MachineOperand &Op = In.getOperand(OpN); + if (!Op.isReg() || !Op.isDef() || !Op.isImplicit()) + continue; + RegisterRef RR = { Op.getReg(), Op.getSubReg() }; + if (!IsCall && !ImpDefs.count(RR)) + continue; + if (DoneDefs.count(RR)) + continue; + uint16_t Flags = NodeAttrs::None; + if (TOI.isPreserving(In, OpN)) + Flags |= NodeAttrs::Preserving; + if (TOI.isClobbering(In, OpN)) + Flags |= NodeAttrs::Clobbering; + if (TOI.isFixedReg(In, OpN)) + Flags |= NodeAttrs::Fixed; + NodeAddr<DefNode*> DA = newDef(SA, Op, Flags); + SA.Addr->addMember(DA, *this); + DoneDefs.insert(RR); + } + + for (unsigned OpN = 0; OpN < NumOps; ++OpN) { + MachineOperand &Op = In.getOperand(OpN); + if (!Op.isReg() || !Op.isUse()) + continue; + RegisterRef RR = { Op.getReg(), Op.getSubReg() }; + // Add implicit uses on return and call instructions, and on predicated + // instructions regardless of whether or not they appear in the instruction + // descriptor's list. + bool Implicit = Op.isImplicit(); + bool TakeImplicit = IsReturn || IsCall || IsPredicated; + if (Implicit && !TakeImplicit && !ImpUses.count(RR)) + continue; + uint16_t Flags = NodeAttrs::None; + if (TOI.isFixedReg(In, OpN)) + Flags |= NodeAttrs::Fixed; + NodeAddr<UseNode*> UA = newUse(SA, Op, Flags); + SA.Addr->addMember(UA, *this); + } +} + +// Build a map that for each block will have the set of all references from +// that block, and from all blocks dominated by it. +void DataFlowGraph::buildBlockRefs(NodeAddr<BlockNode*> BA, + BlockRefsMap &RefM) { + auto &Refs = RefM[BA.Id]; + MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode()); + assert(N); + for (auto I : *N) { + MachineBasicBlock *SB = I->getBlock(); + auto SBA = Func.Addr->findBlock(SB, *this); + buildBlockRefs(SBA, RefM); + const auto &SRs = RefM[SBA.Id]; + Refs.insert(SRs.begin(), SRs.end()); + } + + for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this)) + for (NodeAddr<RefNode*> RA : IA.Addr->members(*this)) + Refs.insert(RA.Addr->getRegRef()); +} + +// Scan all defs in the block node BA and record in PhiM the locations of +// phi nodes corresponding to these defs. +void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, BlockRefsMap &RefM, + NodeAddr<BlockNode*> BA) { + // Check all defs from block BA and record them in each block in BA's + // iterated dominance frontier. This information will later be used to + // create phi nodes. + MachineBasicBlock *BB = BA.Addr->getCode(); + assert(BB); + auto DFLoc = MDF.find(BB); + if (DFLoc == MDF.end() || DFLoc->second.empty()) + return; + + // Traverse all instructions in the block and collect the set of all + // defined references. For each reference there will be a phi created + // in the block's iterated dominance frontier. + // This is done to make sure that each defined reference gets only one + // phi node, even if it is defined multiple times. + RegisterSet Defs; + for (auto I : BA.Addr->members(*this)) { + assert(I.Addr->getType() == NodeAttrs::Code); + assert(I.Addr->getKind() == NodeAttrs::Phi || + I.Addr->getKind() == NodeAttrs::Stmt); + NodeAddr<InstrNode*> IA = I; + for (NodeAddr<RefNode*> RA : IA.Addr->members_if(IsDef, *this)) + Defs.insert(RA.Addr->getRegRef()); + } + + // Finally, add the set of defs to each block in the iterated dominance + // frontier. + const MachineDominanceFrontier::DomSetType &DF = DFLoc->second; + SetVector<MachineBasicBlock*> IDF(DF.begin(), DF.end()); + for (unsigned i = 0; i < IDF.size(); ++i) { + auto F = MDF.find(IDF[i]); + if (F != MDF.end()) + IDF.insert(F->second.begin(), F->second.end()); + } + + // Get the register references that are reachable from this block. + RegisterSet &Refs = RefM[BA.Id]; + for (auto DB : IDF) { + auto DBA = Func.Addr->findBlock(DB, *this); + const auto &Rs = RefM[DBA.Id]; + Refs.insert(Rs.begin(), Rs.end()); + } + + for (auto DB : IDF) { + auto DBA = Func.Addr->findBlock(DB, *this); + PhiM[DBA.Id].insert(Defs.begin(), Defs.end()); + } +} + +// Given the locations of phi nodes in the map PhiM, create the phi nodes +// that are located in the block node BA. +void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM, + NodeAddr<BlockNode*> BA) { + // Check if this blocks has any DF defs, i.e. if there are any defs + // that this block is in the iterated dominance frontier of. + auto HasDF = PhiM.find(BA.Id); + if (HasDF == PhiM.end() || HasDF->second.empty()) + return; + + // First, remove all R in Refs in such that there exists T in Refs + // such that T covers R. In other words, only leave those refs that + // are not covered by another ref (i.e. maximal with respect to covering). + + auto MaxCoverIn = [this] (RegisterRef RR, RegisterSet &RRs) -> RegisterRef { + for (auto I : RRs) + if (I != RR && RAI.covers(I, RR)) + RR = I; + return RR; + }; + + RegisterSet MaxDF; + for (auto I : HasDF->second) + MaxDF.insert(MaxCoverIn(I, HasDF->second)); + + std::vector<RegisterRef> MaxRefs; + auto &RefB = RefM[BA.Id]; + for (auto I : MaxDF) + MaxRefs.push_back(MaxCoverIn(I, RefB)); + + // Now, for each R in MaxRefs, get the alias closure of R. If the closure + // only has R in it, create a phi a def for R. Otherwise, create a phi, + // and add a def for each S in the closure. + + // Sort the refs so that the phis will be created in a deterministic order. + std::sort(MaxRefs.begin(), MaxRefs.end()); + // Remove duplicates. + auto NewEnd = std::unique(MaxRefs.begin(), MaxRefs.end()); + MaxRefs.erase(NewEnd, MaxRefs.end()); + + auto Aliased = [this,&MaxRefs](RegisterRef RR, + std::vector<unsigned> &Closure) -> bool { + for (auto I : Closure) + if (RAI.alias(RR, MaxRefs[I])) + return true; + return false; + }; + + // Prepare a list of NodeIds of the block's predecessors. + std::vector<NodeId> PredList; + const MachineBasicBlock *MBB = BA.Addr->getCode(); + for (auto PB : MBB->predecessors()) { + auto B = Func.Addr->findBlock(PB, *this); + PredList.push_back(B.Id); + } + + while (!MaxRefs.empty()) { + // Put the first element in the closure, and then add all subsequent + // elements from MaxRefs to it, if they alias at least one element + // already in the closure. + // ClosureIdx: vector of indices in MaxRefs of members of the closure. + std::vector<unsigned> ClosureIdx = { 0 }; + for (unsigned i = 1; i != MaxRefs.size(); ++i) + if (Aliased(MaxRefs[i], ClosureIdx)) + ClosureIdx.push_back(i); + + // Build a phi for the closure. + unsigned CS = ClosureIdx.size(); + NodeAddr<PhiNode*> PA = newPhi(BA); + + // Add defs. + for (unsigned X = 0; X != CS; ++X) { + RegisterRef RR = MaxRefs[ClosureIdx[X]]; + uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving; + NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags); + PA.Addr->addMember(DA, *this); + } + // Add phi uses. + for (auto P : PredList) { + auto PBA = addr<BlockNode*>(P); + for (unsigned X = 0; X != CS; ++X) { + RegisterRef RR = MaxRefs[ClosureIdx[X]]; + NodeAddr<PhiUseNode*> PUA = newPhiUse(PA, RR, PBA); + PA.Addr->addMember(PUA, *this); + } + } + + // Erase from MaxRefs all elements in the closure. + auto Begin = MaxRefs.begin(); + for (unsigned i = ClosureIdx.size(); i != 0; --i) + MaxRefs.erase(Begin + ClosureIdx[i-1]); + } +} + +// Remove any unneeded phi nodes that were created during the build process. +void DataFlowGraph::removeUnusedPhis() { + // This will remove unused phis, i.e. phis where each def does not reach + // any uses or other defs. This will not detect or remove circular phi + // chains that are otherwise dead. Unused/dead phis are created during + // the build process and this function is intended to remove these cases + // that are easily determinable to be unnecessary. + + SetVector<NodeId> PhiQ; + for (NodeAddr<BlockNode*> BA : Func.Addr->members(*this)) { + for (auto P : BA.Addr->members_if(IsPhi, *this)) + PhiQ.insert(P.Id); + } + + static auto HasUsedDef = [](NodeList &Ms) -> bool { + for (auto M : Ms) { + if (M.Addr->getKind() != NodeAttrs::Def) + continue; + NodeAddr<DefNode*> DA = M; + if (DA.Addr->getReachedDef() != 0 || DA.Addr->getReachedUse() != 0) + return true; + } + return false; + }; + + // Any phi, if it is removed, may affect other phis (make them dead). + // For each removed phi, collect the potentially affected phis and add + // them back to the queue. + while (!PhiQ.empty()) { + auto PA = addr<PhiNode*>(PhiQ[0]); + PhiQ.remove(PA.Id); + NodeList Refs = PA.Addr->members(*this); + if (HasUsedDef(Refs)) + continue; + for (NodeAddr<RefNode*> RA : Refs) { + if (NodeId RD = RA.Addr->getReachingDef()) { + auto RDA = addr<DefNode*>(RD); + NodeAddr<InstrNode*> OA = RDA.Addr->getOwner(*this); + if (IsPhi(OA)) + PhiQ.insert(OA.Id); + } + if (RA.Addr->isDef()) + unlinkDef(RA); + else + unlinkUse(RA); + } + NodeAddr<BlockNode*> BA = PA.Addr->getOwner(*this); + BA.Addr->removeMember(PA, *this); + } +} + +// For a given reference node TA in an instruction node IA, connect the +// reaching def of TA to the appropriate def node. Create any shadow nodes +// as appropriate. +template <typename T> +void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA, + DefStack &DS) { + if (DS.empty()) + return; + RegisterRef RR = TA.Addr->getRegRef(); + NodeAddr<T> TAP; + + // References from the def stack that have been examined so far. + RegisterSet Defs; + + for (auto I = DS.top(), E = DS.bottom(); I != E; I.down()) { + RegisterRef QR = I->Addr->getRegRef(); + auto AliasQR = [QR,this] (RegisterRef RR) -> bool { + return RAI.alias(QR, RR); + }; + bool PrecUp = RAI.covers(QR, RR); + // Skip all defs that are aliased to any of the defs that we have already + // seen. If we encounter a covering def, stop the stack traversal early. + if (std::any_of(Defs.begin(), Defs.end(), AliasQR)) { + if (PrecUp) + break; + continue; + } + // The reaching def. + NodeAddr<DefNode*> RDA = *I; + + // Pick the reached node. + if (TAP.Id == 0) { + TAP = TA; + } else { + // Mark the existing ref as "shadow" and create a new shadow. + TAP.Addr->setFlags(TAP.Addr->getFlags() | NodeAttrs::Shadow); + TAP = getNextShadow(IA, TAP, true); + } + + // Create the link. + TAP.Addr->linkToDef(TAP.Id, RDA); + + if (PrecUp) + break; + Defs.insert(QR); + } +} + +// Create data-flow links for all reference nodes in the statement node SA. +void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA) { + RegisterSet Defs; + + // Link all nodes (upwards in the data-flow) with their reaching defs. + for (NodeAddr<RefNode*> RA : SA.Addr->members(*this)) { + uint16_t Kind = RA.Addr->getKind(); + assert(Kind == NodeAttrs::Def || Kind == NodeAttrs::Use); + RegisterRef RR = RA.Addr->getRegRef(); + // Do not process multiple defs of the same reference. + if (Kind == NodeAttrs::Def && Defs.count(RR)) + continue; + Defs.insert(RR); + + auto F = DefM.find(RR); + if (F == DefM.end()) + continue; + DefStack &DS = F->second; + if (Kind == NodeAttrs::Use) + linkRefUp<UseNode*>(SA, RA, DS); + else if (Kind == NodeAttrs::Def) + linkRefUp<DefNode*>(SA, RA, DS); + else + llvm_unreachable("Unexpected node in instruction"); + } +} + +// Create data-flow links for all instructions in the block node BA. This +// will include updating any phi nodes in BA. +void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) { + // Push block delimiters. + markBlock(BA.Id, DefM); + + // For each non-phi instruction in the block, link all the defs and uses + // to their reaching defs. For any member of the block (including phis), + // push the defs on the corresponding stacks. + for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this)) { + // Ignore phi nodes here. They will be linked part by part from the + // predecessors. + if (IA.Addr->getKind() == NodeAttrs::Stmt) + linkStmtRefs(DefM, IA); + + // Push the definitions on the stack. + pushDefs(IA, DefM); + } + + // Recursively process all children in the dominator tree. + MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode()); + for (auto I : *N) { + MachineBasicBlock *SB = I->getBlock(); + auto SBA = Func.Addr->findBlock(SB, *this); + linkBlockRefs(DefM, SBA); + } + + // Link the phi uses from the successor blocks. + auto IsUseForBA = [BA](NodeAddr<NodeBase*> NA) -> bool { + if (NA.Addr->getKind() != NodeAttrs::Use) + return false; + assert(NA.Addr->getFlags() & NodeAttrs::PhiRef); + NodeAddr<PhiUseNode*> PUA = NA; + return PUA.Addr->getPredecessor() == BA.Id; + }; + MachineBasicBlock *MBB = BA.Addr->getCode(); + for (auto SB : MBB->successors()) { + auto SBA = Func.Addr->findBlock(SB, *this); + for (NodeAddr<InstrNode*> IA : SBA.Addr->members_if(IsPhi, *this)) { + // Go over each phi use associated with MBB, and link it. + for (auto U : IA.Addr->members_if(IsUseForBA, *this)) { + NodeAddr<PhiUseNode*> PUA = U; + RegisterRef RR = PUA.Addr->getRegRef(); + linkRefUp<UseNode*>(IA, PUA, DefM[RR]); + } + } + } + + // Pop all defs from this block from the definition stacks. + releaseBlock(BA.Id, DefM); +} + +// Remove the use node UA from any data-flow and structural links. +void DataFlowGraph::unlinkUse(NodeAddr<UseNode*> UA) { + NodeId RD = UA.Addr->getReachingDef(); + NodeId Sib = UA.Addr->getSibling(); + + NodeAddr<InstrNode*> IA = UA.Addr->getOwner(*this); + IA.Addr->removeMember(UA, *this); + + if (RD == 0) { + assert(Sib == 0); + return; + } + + auto RDA = addr<DefNode*>(RD); + auto TA = addr<UseNode*>(RDA.Addr->getReachedUse()); + if (TA.Id == UA.Id) { + RDA.Addr->setReachedUse(Sib); + return; + } + + while (TA.Id != 0) { + NodeId S = TA.Addr->getSibling(); + if (S == UA.Id) { + TA.Addr->setSibling(UA.Addr->getSibling()); + return; + } + TA = addr<UseNode*>(S); + } +} + +// Remove the def node DA from any data-flow and structural links. +void DataFlowGraph::unlinkDef(NodeAddr<DefNode*> DA) { + // + // RD + // | reached + // | def + // : + // . + // +----+ + // ... -- | DA | -- ... -- 0 : sibling chain of DA + // +----+ + // | | reached + // | : def + // | . + // | ... : Siblings (defs) + // | + // : reached + // . use + // ... : sibling chain of reached uses + + NodeId RD = DA.Addr->getReachingDef(); + + // Visit all siblings of the reached def and reset their reaching defs. + // Also, defs reached by DA are now "promoted" to being reached by RD, + // so all of them will need to be spliced into the sibling chain where + // DA belongs. + auto getAllNodes = [this] (NodeId N) -> NodeList { + NodeList Res; + while (N) { + auto RA = addr<RefNode*>(N); + // Keep the nodes in the exact sibling order. + Res.push_back(RA); + N = RA.Addr->getSibling(); + } + return Res; + }; + NodeList ReachedDefs = getAllNodes(DA.Addr->getReachedDef()); + NodeList ReachedUses = getAllNodes(DA.Addr->getReachedUse()); + + if (RD == 0) { + for (NodeAddr<RefNode*> I : ReachedDefs) + I.Addr->setSibling(0); + for (NodeAddr<RefNode*> I : ReachedUses) + I.Addr->setSibling(0); + } + for (NodeAddr<DefNode*> I : ReachedDefs) + I.Addr->setReachingDef(RD); + for (NodeAddr<UseNode*> I : ReachedUses) + I.Addr->setReachingDef(RD); + + NodeId Sib = DA.Addr->getSibling(); + if (RD == 0) { + assert(Sib == 0); + return; + } + + // Update the reaching def node and remove DA from the sibling list. + auto RDA = addr<DefNode*>(RD); + auto TA = addr<DefNode*>(RDA.Addr->getReachedDef()); + if (TA.Id == DA.Id) { + // If DA is the first reached def, just update the RD's reached def + // to the DA's sibling. + RDA.Addr->setReachedDef(Sib); + } else { + // Otherwise, traverse the sibling list of the reached defs and remove + // DA from it. + while (TA.Id != 0) { + NodeId S = TA.Addr->getSibling(); + if (S == DA.Id) { + TA.Addr->setSibling(Sib); + break; + } + TA = addr<DefNode*>(S); + } + } + + // Splice the DA's reached defs into the RDA's reached def chain. + if (!ReachedDefs.empty()) { + auto Last = NodeAddr<DefNode*>(ReachedDefs.back()); + Last.Addr->setSibling(RDA.Addr->getReachedDef()); + RDA.Addr->setReachedDef(ReachedDefs.front().Id); + } + // Splice the DA's reached uses into the RDA's reached use chain. + if (!ReachedUses.empty()) { + auto Last = NodeAddr<UseNode*>(ReachedUses.back()); + Last.Addr->setSibling(RDA.Addr->getReachedUse()); + RDA.Addr->setReachedUse(ReachedUses.front().Id); + } + + NodeAddr<InstrNode*> IA = DA.Addr->getOwner(*this); + IA.Addr->removeMember(DA, *this); +} diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.h b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h new file mode 100644 index 0000000..7da7bb5 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h @@ -0,0 +1,841 @@ +//===--- RDFGraph.h -------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Target-independent, SSA-based data flow graph for register data flow (RDF) +// for a non-SSA program representation (e.g. post-RA machine code). +// +// +// *** Introduction +// +// The RDF graph is a collection of nodes, each of which denotes some element +// of the program. There are two main types of such elements: code and refe- +// rences. Conceptually, "code" is something that represents the structure +// of the program, e.g. basic block or a statement, while "reference" is an +// instance of accessing a register, e.g. a definition or a use. Nodes are +// connected with each other based on the structure of the program (such as +// blocks, instructions, etc.), and based on the data flow (e.g. reaching +// definitions, reached uses, etc.). The single-reaching-definition principle +// of SSA is generally observed, although, due to the non-SSA representation +// of the program, there are some differences between the graph and a "pure" +// SSA representation. +// +// +// *** Implementation remarks +// +// Since the graph can contain a large number of nodes, memory consumption +// was one of the major design considerations. As a result, there is a single +// base class NodeBase which defines all members used by all possible derived +// classes. The members are arranged in a union, and a derived class cannot +// add any data members of its own. Each derived class only defines the +// functional interface, i.e. member functions. NodeBase must be a POD, +// which implies that all of its members must also be PODs. +// Since nodes need to be connected with other nodes, pointers have been +// replaced with 32-bit identifiers: each node has an id of type NodeId. +// There are mapping functions in the graph that translate between actual +// memory addresses and the corresponding identifiers. +// A node id of 0 is equivalent to nullptr. +// +// +// *** Structure of the graph +// +// A code node is always a collection of other nodes. For example, a code +// node corresponding to a basic block will contain code nodes corresponding +// to instructions. In turn, a code node corresponding to an instruction will +// contain a list of reference nodes that correspond to the definitions and +// uses of registers in that instruction. The members are arranged into a +// circular list, which is yet another consequence of the effort to save +// memory: for each member node it should be possible to obtain its owner, +// and it should be possible to access all other members. There are other +// ways to accomplish that, but the circular list seemed the most natural. +// +// +- CodeNode -+ +// | | <---------------------------------------------------+ +// +-+--------+-+ | +// |FirstM |LastM | +// | +-------------------------------------+ | +// | | | +// V V | +// +----------+ Next +----------+ Next Next +----------+ Next | +// | |----->| |-----> ... ----->| |----->-+ +// +- Member -+ +- Member -+ +- Member -+ +// +// The order of members is such that related reference nodes (see below) +// should be contiguous on the member list. +// +// A reference node is a node that encapsulates an access to a register, +// in other words, data flowing into or out of a register. There are two +// major kinds of reference nodes: defs and uses. A def node will contain +// the id of the first reached use, and the id of the first reached def. +// Each def and use will contain the id of the reaching def, and also the +// id of the next reached def (for def nodes) or use (for use nodes). +// The "next node sharing the same reaching def" is denoted as "sibling". +// In summary: +// - Def node contains: reaching def, sibling, first reached def, and first +// reached use. +// - Use node contains: reaching def and sibling. +// +// +-- DefNode --+ +// | R2 = ... | <---+--------------------+ +// ++---------+--+ | | +// |Reached |Reached | | +// |Def |Use | | +// | | |Reaching |Reaching +// | V |Def |Def +// | +-- UseNode --+ Sib +-- UseNode --+ Sib Sib +// | | ... = R2 |----->| ... = R2 |----> ... ----> 0 +// | +-------------+ +-------------+ +// V +// +-- DefNode --+ Sib +// | R2 = ... |----> ... +// ++---------+--+ +// | | +// | | +// ... ... +// +// To get a full picture, the circular lists connecting blocks within a +// function, instructions within a block, etc. should be superimposed with +// the def-def, def-use links shown above. +// To illustrate this, consider a small example in a pseudo-assembly: +// foo: +// add r2, r0, r1 ; r2 = r0+r1 +// addi r0, r2, 1 ; r0 = r2+1 +// ret r0 ; return value in r0 +// +// The graph (in a format used by the debugging functions) would look like: +// +// DFG dump:[ +// f1: Function foo +// b2: === BB#0 === preds(0), succs(0): +// p3: phi [d4<r0>(,d12,u9):] +// p5: phi [d6<r1>(,,u10):] +// s7: add [d8<r2>(,,u13):, u9<r0>(d4):, u10<r1>(d6):] +// s11: addi [d12<r0>(d4,,u15):, u13<r2>(d8):] +// s14: ret [u15<r0>(d12):] +// ] +// +// The f1, b2, p3, etc. are node ids. The letter is prepended to indicate the +// kind of the node (i.e. f - function, b - basic block, p - phi, s - state- +// ment, d - def, u - use). +// The format of a def node is: +// dN<R>(rd,d,u):sib, +// where +// N - numeric node id, +// R - register being defined +// rd - reaching def, +// d - reached def, +// u - reached use, +// sib - sibling. +// The format of a use node is: +// uN<R>[!](rd):sib, +// where +// N - numeric node id, +// R - register being used, +// rd - reaching def, +// sib - sibling. +// Possible annotations (usually preceding the node id): +// + - preserving def, +// ~ - clobbering def, +// " - shadow ref (follows the node id), +// ! - fixed register (appears after register name). +// +// The circular lists are not explicit in the dump. +// +// +// *** Node attributes +// +// NodeBase has a member "Attrs", which is the primary way of determining +// the node's characteristics. The fields in this member decide whether +// the node is a code node or a reference node (i.e. node's "type"), then +// within each type, the "kind" determines what specifically this node +// represents. The remaining bits, "flags", contain additional information +// that is even more detailed than the "kind". +// CodeNode's kinds are: +// - Phi: Phi node, members are reference nodes. +// - Stmt: Statement, members are reference nodes. +// - Block: Basic block, members are instruction nodes (i.e. Phi or Stmt). +// - Func: The whole function. The members are basic block nodes. +// RefNode's kinds are: +// - Use. +// - Def. +// +// Meaning of flags: +// - Preserving: applies only to defs. A preserving def is one that can +// preserve some of the original bits among those that are included in +// the register associated with that def. For example, if R0 is a 32-bit +// register, but a def can only change the lower 16 bits, then it will +// be marked as preserving. +// - Shadow: a reference that has duplicates holding additional reaching +// defs (see more below). +// - Clobbering: applied only to defs, indicates that the value generated +// by this def is unspecified. A typical example would be volatile registers +// after function calls. +// +// +// *** Shadow references +// +// It may happen that a super-register can have two (or more) non-overlapping +// sub-registers. When both of these sub-registers are defined and followed +// by a use of the super-register, the use of the super-register will not +// have a unique reaching def: both defs of the sub-registers need to be +// accounted for. In such cases, a duplicate use of the super-register is +// added and it points to the extra reaching def. Both uses are marked with +// a flag "shadow". Example: +// Assume t0 is a super-register of r0 and r1, r0 and r1 do not overlap: +// set r0, 1 ; r0 = 1 +// set r1, 1 ; r1 = 1 +// addi t1, t0, 1 ; t1 = t0+1 +// +// The DFG: +// s1: set [d2<r0>(,,u9):] +// s3: set [d4<r1>(,,u10):] +// s5: addi [d6<t1>(,,):, u7"<t0>(d2):, u8"<t0>(d4):] +// +// The statement s5 has two use nodes for t0: u7" and u9". The quotation +// mark " indicates that the node is a shadow. +// +#ifndef RDF_GRAPH_H +#define RDF_GRAPH_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Timer.h" + +#include <functional> +#include <map> +#include <set> +#include <vector> + +using namespace llvm; + +namespace llvm { + class MachineBasicBlock; + class MachineFunction; + class MachineInstr; + class MachineOperand; + class MachineDominanceFrontier; + class MachineDominatorTree; + class TargetInstrInfo; + class TargetRegisterInfo; +} + +namespace rdf { + typedef uint32_t NodeId; + + struct NodeAttrs { + enum : uint16_t { + None = 0x0000, // Nothing + + // Types: 2 bits + TypeMask = 0x0003, + Code = 0x0001, // 01, Container + Ref = 0x0002, // 10, Reference + + // Kind: 3 bits + KindMask = 0x0007 << 2, + Def = 0x0001 << 2, // 001 + Use = 0x0002 << 2, // 010 + Phi = 0x0003 << 2, // 011 + Stmt = 0x0004 << 2, // 100 + Block = 0x0005 << 2, // 101 + Func = 0x0006 << 2, // 110 + + // Flags: 5 bits for now + FlagMask = 0x001F << 5, + Shadow = 0x0001 << 5, // 00001, Has extra reaching defs. + Clobbering = 0x0002 << 5, // 00010, Produces unspecified values. + PhiRef = 0x0004 << 5, // 00100, Member of PhiNode. + Preserving = 0x0008 << 5, // 01000, Def can keep original bits. + Fixed = 0x0010 << 5, // 10000, Fixed register. + }; + + static uint16_t type(uint16_t T) { return T & TypeMask; } + static uint16_t kind(uint16_t T) { return T & KindMask; } + static uint16_t flags(uint16_t T) { return T & FlagMask; } + + static uint16_t set_type(uint16_t A, uint16_t T) { + return (A & ~TypeMask) | T; + } + static uint16_t set_kind(uint16_t A, uint16_t K) { + return (A & ~KindMask) | K; + } + static uint16_t set_flags(uint16_t A, uint16_t F) { + return (A & ~FlagMask) | F; + } + + // Test if A contains B. + static bool contains(uint16_t A, uint16_t B) { + if (type(A) != Code) + return false; + uint16_t KB = kind(B); + switch (kind(A)) { + case Func: + return KB == Block; + case Block: + return KB == Phi || KB == Stmt; + case Phi: + case Stmt: + return type(B) == Ref; + } + return false; + } + }; + + template <typename T> struct NodeAddr { + NodeAddr() : Addr(nullptr), Id(0) {} + NodeAddr(T A, NodeId I) : Addr(A), Id(I) {} + NodeAddr(const NodeAddr&) = default; + NodeAddr &operator= (const NodeAddr&) = default; + + bool operator== (const NodeAddr<T> &NA) const { + assert((Addr == NA.Addr) == (Id == NA.Id)); + return Addr == NA.Addr; + } + bool operator!= (const NodeAddr<T> &NA) const { + return !operator==(NA); + } + // Type cast (casting constructor). The reason for having this class + // instead of std::pair. + template <typename S> NodeAddr(const NodeAddr<S> &NA) + : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {} + + T Addr; + NodeId Id; + }; + + struct NodeBase; + + // Fast memory allocation and translation between node id and node address. + // This is really the same idea as the one underlying the "bump pointer + // allocator", the difference being in the translation. A node id is + // composed of two components: the index of the block in which it was + // allocated, and the index within the block. With the default settings, + // where the number of nodes per block is 4096, the node id (minus 1) is: + // + // bit position: 11 0 + // +----------------------------+--------------+ + // | Index of the block |Index in block| + // +----------------------------+--------------+ + // + // The actual node id is the above plus 1, to avoid creating a node id of 0. + // + // This method significantly improved the build time, compared to using maps + // (std::unordered_map or DenseMap) to translate between pointers and ids. + struct NodeAllocator { + // Amount of storage for a single node. + enum { NodeMemSize = 32 }; + NodeAllocator(uint32_t NPB = 4096) + : NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)), + IndexMask((1 << BitsPerIndex)-1), ActiveEnd(nullptr) { + assert(isPowerOf2_32(NPB)); + } + NodeBase *ptr(NodeId N) const { + uint32_t N1 = N-1; + uint32_t BlockN = N1 >> BitsPerIndex; + uint32_t Offset = (N1 & IndexMask) * NodeMemSize; + return reinterpret_cast<NodeBase*>(Blocks[BlockN]+Offset); + } + NodeId id(const NodeBase *P) const; + NodeAddr<NodeBase*> New(); + void clear(); + + private: + void startNewBlock(); + bool needNewBlock(); + uint32_t makeId(uint32_t Block, uint32_t Index) const { + // Add 1 to the id, to avoid the id of 0, which is treated as "null". + return ((Block << BitsPerIndex) | Index) + 1; + } + + const uint32_t NodesPerBlock; + const uint32_t BitsPerIndex; + const uint32_t IndexMask; + char *ActiveEnd; + std::vector<char*> Blocks; + typedef BumpPtrAllocatorImpl<MallocAllocator, 65536> AllocatorTy; + AllocatorTy MemPool; + }; + + struct RegisterRef { + unsigned Reg, Sub; + + // No non-trivial constructors, since this will be a member of a union. + RegisterRef() = default; + RegisterRef(const RegisterRef &RR) = default; + RegisterRef &operator= (const RegisterRef &RR) = default; + bool operator== (const RegisterRef &RR) const { + return Reg == RR.Reg && Sub == RR.Sub; + } + bool operator!= (const RegisterRef &RR) const { + return !operator==(RR); + } + bool operator< (const RegisterRef &RR) const { + return Reg < RR.Reg || (Reg == RR.Reg && Sub < RR.Sub); + } + }; + typedef std::set<RegisterRef> RegisterSet; + + struct RegisterAliasInfo { + RegisterAliasInfo(const TargetRegisterInfo &tri) : TRI(tri) {} + virtual ~RegisterAliasInfo() {} + + virtual std::vector<RegisterRef> getAliasSet(RegisterRef RR) const; + virtual bool alias(RegisterRef RA, RegisterRef RB) const; + virtual bool covers(RegisterRef RA, RegisterRef RB) const; + virtual bool covers(const RegisterSet &RRs, RegisterRef RR) const; + + const TargetRegisterInfo &TRI; + }; + + struct TargetOperandInfo { + TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {} + virtual ~TargetOperandInfo() {} + virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const; + virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const; + virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const; + + const TargetInstrInfo &TII; + }; + + + struct DataFlowGraph; + + struct NodeBase { + public: + // Make sure this is a POD. + NodeBase() = default; + uint16_t getType() const { return NodeAttrs::type(Attrs); } + uint16_t getKind() const { return NodeAttrs::kind(Attrs); } + uint16_t getFlags() const { return NodeAttrs::flags(Attrs); } + NodeId getNext() const { return Next; } + + uint16_t getAttrs() const { return Attrs; } + void setAttrs(uint16_t A) { Attrs = A; } + void setFlags(uint16_t F) { setAttrs(NodeAttrs::set_flags(getAttrs(), F)); } + + // Insert node NA after "this" in the circular chain. + void append(NodeAddr<NodeBase*> NA); + // Initialize all members to 0. + void init() { memset(this, 0, sizeof *this); } + void setNext(NodeId N) { Next = N; } + + protected: + uint16_t Attrs; + uint16_t Reserved; + NodeId Next; // Id of the next node in the circular chain. + // Definitions of nested types. Using anonymous nested structs would make + // this class definition clearer, but unnamed structs are not a part of + // the standard. + struct Def_struct { + NodeId DD, DU; // Ids of the first reached def and use. + }; + struct PhiU_struct { + NodeId PredB; // Id of the predecessor block for a phi use. + }; + struct Code_struct { + void *CP; // Pointer to the actual code. + NodeId FirstM, LastM; // Id of the first member and last. + }; + struct Ref_struct { + NodeId RD, Sib; // Ids of the reaching def and the sibling. + union { + Def_struct Def; + PhiU_struct PhiU; + }; + union { + MachineOperand *Op; // Non-phi refs point to a machine operand. + RegisterRef RR; // Phi refs store register info directly. + }; + }; + + // The actual payload. + union { + Ref_struct Ref; + Code_struct Code; + }; + }; + // The allocator allocates chunks of 32 bytes for each node. The fact that + // each node takes 32 bytes in memory is used for fast translation between + // the node id and the node address. + static_assert(sizeof(NodeBase) <= NodeAllocator::NodeMemSize, + "NodeBase must be at most NodeAllocator::NodeMemSize bytes"); + + typedef std::vector<NodeAddr<NodeBase*>> NodeList; + typedef std::set<NodeId> NodeSet; + + struct RefNode : public NodeBase { + RefNode() = default; + RegisterRef getRegRef() const; + MachineOperand &getOp() { + assert(!(getFlags() & NodeAttrs::PhiRef)); + return *Ref.Op; + } + void setRegRef(RegisterRef RR); + void setRegRef(MachineOperand *Op); + NodeId getReachingDef() const { + return Ref.RD; + } + void setReachingDef(NodeId RD) { + Ref.RD = RD; + } + NodeId getSibling() const { + return Ref.Sib; + } + void setSibling(NodeId Sib) { + Ref.Sib = Sib; + } + bool isUse() const { + assert(getType() == NodeAttrs::Ref); + return getKind() == NodeAttrs::Use; + } + bool isDef() const { + assert(getType() == NodeAttrs::Ref); + return getKind() == NodeAttrs::Def; + } + + template <typename Predicate> + NodeAddr<RefNode*> getNextRef(RegisterRef RR, Predicate P, bool NextOnly, + const DataFlowGraph &G); + NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G); + }; + + struct DefNode : public RefNode { + NodeId getReachedDef() const { + return Ref.Def.DD; + } + void setReachedDef(NodeId D) { + Ref.Def.DD = D; + } + NodeId getReachedUse() const { + return Ref.Def.DU; + } + void setReachedUse(NodeId U) { + Ref.Def.DU = U; + } + + void linkToDef(NodeId Self, NodeAddr<DefNode*> DA); + }; + + struct UseNode : public RefNode { + void linkToDef(NodeId Self, NodeAddr<DefNode*> DA); + }; + + struct PhiUseNode : public UseNode { + NodeId getPredecessor() const { + assert(getFlags() & NodeAttrs::PhiRef); + return Ref.PhiU.PredB; + } + void setPredecessor(NodeId B) { + assert(getFlags() & NodeAttrs::PhiRef); + Ref.PhiU.PredB = B; + } + }; + + struct CodeNode : public NodeBase { + template <typename T> T getCode() const { + return static_cast<T>(Code.CP); + } + void setCode(void *C) { + Code.CP = C; + } + + NodeAddr<NodeBase*> getFirstMember(const DataFlowGraph &G) const; + NodeAddr<NodeBase*> getLastMember(const DataFlowGraph &G) const; + void addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G); + void addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA, + const DataFlowGraph &G); + void removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G); + + NodeList members(const DataFlowGraph &G) const; + template <typename Predicate> + NodeList members_if(Predicate P, const DataFlowGraph &G) const; + }; + + struct InstrNode : public CodeNode { + NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G); + }; + + struct PhiNode : public InstrNode { + MachineInstr *getCode() const { + return nullptr; + } + }; + + struct StmtNode : public InstrNode { + MachineInstr *getCode() const { + return CodeNode::getCode<MachineInstr*>(); + } + }; + + struct BlockNode : public CodeNode { + MachineBasicBlock *getCode() const { + return CodeNode::getCode<MachineBasicBlock*>(); + } + void addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G); + }; + + struct FuncNode : public CodeNode { + MachineFunction *getCode() const { + return CodeNode::getCode<MachineFunction*>(); + } + NodeAddr<BlockNode*> findBlock(const MachineBasicBlock *BB, + const DataFlowGraph &G) const; + NodeAddr<BlockNode*> getEntryBlock(const DataFlowGraph &G); + }; + + struct DataFlowGraph { + DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii, + const TargetRegisterInfo &tri, const MachineDominatorTree &mdt, + const MachineDominanceFrontier &mdf, const RegisterAliasInfo &rai, + const TargetOperandInfo &toi); + + NodeBase *ptr(NodeId N) const; + template <typename T> T ptr(NodeId N) const { + return static_cast<T>(ptr(N)); + } + NodeId id(const NodeBase *P) const; + + template <typename T> NodeAddr<T> addr(NodeId N) const { + return { ptr<T>(N), N }; + } + + NodeAddr<FuncNode*> getFunc() const { + return Func; + } + MachineFunction &getMF() const { + return MF; + } + const TargetInstrInfo &getTII() const { + return TII; + } + const TargetRegisterInfo &getTRI() const { + return TRI; + } + const MachineDominatorTree &getDT() const { + return MDT; + } + const MachineDominanceFrontier &getDF() const { + return MDF; + } + const RegisterAliasInfo &getRAI() const { + return RAI; + } + + struct DefStack { + DefStack() = default; + bool empty() const { return Stack.empty() || top() == bottom(); } + private: + typedef NodeAddr<DefNode*> value_type; + struct Iterator { + typedef DefStack::value_type value_type; + Iterator &up() { Pos = DS.nextUp(Pos); return *this; } + Iterator &down() { Pos = DS.nextDown(Pos); return *this; } + value_type operator*() const { + assert(Pos >= 1); + return DS.Stack[Pos-1]; + } + const value_type *operator->() const { + assert(Pos >= 1); + return &DS.Stack[Pos-1]; + } + bool operator==(const Iterator &It) const { return Pos == It.Pos; } + bool operator!=(const Iterator &It) const { return Pos != It.Pos; } + private: + Iterator(const DefStack &S, bool Top); + // Pos-1 is the index in the StorageType object that corresponds to + // the top of the DefStack. + const DefStack &DS; + unsigned Pos; + friend struct DefStack; + }; + public: + typedef Iterator iterator; + iterator top() const { return Iterator(*this, true); } + iterator bottom() const { return Iterator(*this, false); } + unsigned size() const; + + void push(NodeAddr<DefNode*> DA) { Stack.push_back(DA); } + void pop(); + void start_block(NodeId N); + void clear_block(NodeId N); + private: + friend struct Iterator; + typedef std::vector<value_type> StorageType; + bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const { + return (P.Addr == nullptr) && (N == 0 || P.Id == N); + } + unsigned nextUp(unsigned P) const; + unsigned nextDown(unsigned P) const; + StorageType Stack; + }; + + typedef std::map<RegisterRef,DefStack> DefStackMap; + + void build(); + void pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM); + void markBlock(NodeId B, DefStackMap &DefM); + void releaseBlock(NodeId B, DefStackMap &DefM); + + NodeAddr<RefNode*> getNextRelated(NodeAddr<InstrNode*> IA, + NodeAddr<RefNode*> RA) const; + NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA, + NodeAddr<RefNode*> RA, bool Create); + NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA, + NodeAddr<RefNode*> RA) const; + NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA, + NodeAddr<RefNode*> RA, bool Create); + NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA, + NodeAddr<RefNode*> RA) const; + + NodeList getRelatedRefs(NodeAddr<InstrNode*> IA, + NodeAddr<RefNode*> RA) const; + + void unlinkUse(NodeAddr<UseNode*> UA); + void unlinkDef(NodeAddr<DefNode*> DA); + + // Some useful filters. + template <uint16_t Kind> + static bool IsRef(const NodeAddr<NodeBase*> BA) { + return BA.Addr->getType() == NodeAttrs::Ref && + BA.Addr->getKind() == Kind; + } + template <uint16_t Kind> + static bool IsCode(const NodeAddr<NodeBase*> BA) { + return BA.Addr->getType() == NodeAttrs::Code && + BA.Addr->getKind() == Kind; + } + static bool IsDef(const NodeAddr<NodeBase*> BA) { + return BA.Addr->getType() == NodeAttrs::Ref && + BA.Addr->getKind() == NodeAttrs::Def; + } + static bool IsUse(const NodeAddr<NodeBase*> BA) { + return BA.Addr->getType() == NodeAttrs::Ref && + BA.Addr->getKind() == NodeAttrs::Use; + } + static bool IsPhi(const NodeAddr<NodeBase*> BA) { + return BA.Addr->getType() == NodeAttrs::Code && + BA.Addr->getKind() == NodeAttrs::Phi; + } + + private: + void reset(); + + NodeAddr<NodeBase*> newNode(uint16_t Attrs); + NodeAddr<NodeBase*> cloneNode(const NodeAddr<NodeBase*> B); + NodeAddr<UseNode*> newUse(NodeAddr<InstrNode*> Owner, + MachineOperand &Op, uint16_t Flags = NodeAttrs::None); + NodeAddr<PhiUseNode*> newPhiUse(NodeAddr<PhiNode*> Owner, + RegisterRef RR, NodeAddr<BlockNode*> PredB, + uint16_t Flags = NodeAttrs::PhiRef); + NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner, + MachineOperand &Op, uint16_t Flags = NodeAttrs::None); + NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner, + RegisterRef RR, uint16_t Flags = NodeAttrs::PhiRef); + NodeAddr<PhiNode*> newPhi(NodeAddr<BlockNode*> Owner); + NodeAddr<StmtNode*> newStmt(NodeAddr<BlockNode*> Owner, + MachineInstr *MI); + NodeAddr<BlockNode*> newBlock(NodeAddr<FuncNode*> Owner, + MachineBasicBlock *BB); + NodeAddr<FuncNode*> newFunc(MachineFunction *MF); + + template <typename Predicate> + std::pair<NodeAddr<RefNode*>,NodeAddr<RefNode*>> + locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA, + Predicate P) const; + + typedef std::map<NodeId,RegisterSet> BlockRefsMap; + + void buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In); + void buildBlockRefs(NodeAddr<BlockNode*> BA, BlockRefsMap &RefM); + void recordDefsForDF(BlockRefsMap &PhiM, BlockRefsMap &RefM, + NodeAddr<BlockNode*> BA); + void buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM, + NodeAddr<BlockNode*> BA); + void removeUnusedPhis(); + + template <typename T> void linkRefUp(NodeAddr<InstrNode*> IA, + NodeAddr<T> TA, DefStack &DS); + void linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA); + void linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA); + + TimerGroup TimeG; + NodeAddr<FuncNode*> Func; + NodeAllocator Memory; + + MachineFunction &MF; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; + const MachineDominatorTree &MDT; + const MachineDominanceFrontier &MDF; + const RegisterAliasInfo &RAI; + const TargetOperandInfo &TOI; + }; // struct DataFlowGraph + + template <typename Predicate> + NodeAddr<RefNode*> RefNode::getNextRef(RegisterRef RR, Predicate P, + bool NextOnly, const DataFlowGraph &G) { + // Get the "Next" reference in the circular list that references RR and + // satisfies predicate "Pred". + auto NA = G.addr<NodeBase*>(getNext()); + + while (NA.Addr != this) { + if (NA.Addr->getType() == NodeAttrs::Ref) { + NodeAddr<RefNode*> RA = NA; + if (RA.Addr->getRegRef() == RR && P(NA)) + return NA; + if (NextOnly) + break; + NA = G.addr<NodeBase*>(NA.Addr->getNext()); + } else { + // We've hit the beginning of the chain. + assert(NA.Addr->getType() == NodeAttrs::Code); + NodeAddr<CodeNode*> CA = NA; + NA = CA.Addr->getFirstMember(G); + } + } + // Return the equivalent of "nullptr" if such a node was not found. + return NodeAddr<RefNode*>(); + } + + template <typename Predicate> + NodeList CodeNode::members_if(Predicate P, const DataFlowGraph &G) const { + NodeList MM; + auto M = getFirstMember(G); + if (M.Id == 0) + return MM; + + while (M.Addr != this) { + if (P(M)) + MM.push_back(M); + M = G.addr<NodeBase*>(M.Addr->getNext()); + } + return MM; + } + + + template <typename T> struct Print; + template <typename T> + raw_ostream &operator<< (raw_ostream &OS, const Print<T> &P); + + template <typename T> + struct Print { + Print(const T &x, const DataFlowGraph &g) : Obj(x), G(g) {} + const T &Obj; + const DataFlowGraph &G; + }; + + template <typename T> + struct PrintNode : Print<NodeAddr<T>> { + PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g) + : Print<NodeAddr<T>>(x, g) {} + }; +} // namespace rdf + +#endif // RDF_GRAPH_H diff --git a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp new file mode 100644 index 0000000..1d9bd37 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp @@ -0,0 +1,848 @@ +//===--- RDFLiveness.cpp --------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Computation of the liveness information from the data-flow graph. +// +// The main functionality of this code is to compute block live-in +// information. With the live-in information in place, the placement +// of kill flags can also be recalculated. +// +// The block live-in calculation is based on the ideas from the following +// publication: +// +// Dibyendu Das, Ramakrishna Upadrasta, Benoit Dupont de Dinechin. +// "Efficient Liveness Computation Using Merge Sets and DJ-Graphs." +// ACM Transactions on Architecture and Code Optimization, Association for +// Computing Machinery, 2012, ACM TACO Special Issue on "High-Performance +// and Embedded Architectures and Compilers", 8 (4), +// <10.1145/2086696.2086706>. <hal-00647369> +// +#include "RDFGraph.h" +#include "RDFLiveness.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominanceFrontier.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; +using namespace rdf; + +namespace rdf { + template<> + raw_ostream &operator<< (raw_ostream &OS, const Print<Liveness::RefMap> &P) { + OS << '{'; + for (auto I : P.Obj) { + OS << ' ' << Print<RegisterRef>(I.first, P.G) << '{'; + for (auto J = I.second.begin(), E = I.second.end(); J != E; ) { + OS << Print<NodeId>(*J, P.G); + if (++J != E) + OS << ','; + } + OS << '}'; + } + OS << " }"; + return OS; + } +} + +// The order in the returned sequence is the order of reaching defs in the +// upward traversal: the first def is the closest to the given reference RefA, +// the next one is further up, and so on. +// The list ends at a reaching phi def, or when the reference from RefA is +// covered by the defs in the list (see FullChain). +// This function provides two modes of operation: +// (1) Returning the sequence of reaching defs for a particular reference +// node. This sequence will terminate at the first phi node [1]. +// (2) Returning a partial sequence of reaching defs, where the final goal +// is to traverse past phi nodes to the actual defs arising from the code +// itself. +// In mode (2), the register reference for which the search was started +// may be different from the reference node RefA, for which this call was +// made, hence the argument RefRR, which holds the original register. +// Also, some definitions may have already been encountered in a previous +// call that will influence register covering. The register references +// already defined are passed in through DefRRs. +// In mode (1), the "continuation" considerations do not apply, and the +// RefRR is the same as the register in RefA, and the set DefRRs is empty. +// +// [1] It is possible for multiple phi nodes to be included in the returned +// sequence: +// SubA = phi ... +// SubB = phi ... +// ... = SuperAB(rdef:SubA), SuperAB"(rdef:SubB) +// However, these phi nodes are independent from one another in terms of +// the data-flow. + +NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, + NodeAddr<RefNode*> RefA, bool FullChain, const RegisterSet &DefRRs) { + SetVector<NodeId> DefQ; + SetVector<NodeId> Owners; + + // The initial queue should not have reaching defs for shadows. The + // whole point of a shadow is that it will have a reaching def that + // is not aliased to the reaching defs of the related shadows. + NodeId Start = RefA.Id; + auto SNA = DFG.addr<RefNode*>(Start); + if (NodeId RD = SNA.Addr->getReachingDef()) + DefQ.insert(RD); + + // Collect all the reaching defs, going up until a phi node is encountered, + // or there are no more reaching defs. From this set, the actual set of + // reaching defs will be selected. + // The traversal upwards must go on until a covering def is encountered. + // It is possible that a collection of non-covering (individually) defs + // will be sufficient, but keep going until a covering one is found. + for (unsigned i = 0; i < DefQ.size(); ++i) { + auto TA = DFG.addr<DefNode*>(DefQ[i]); + if (TA.Addr->getFlags() & NodeAttrs::PhiRef) + continue; + // Stop at the covering/overwriting def of the initial register reference. + RegisterRef RR = TA.Addr->getRegRef(); + if (RAI.covers(RR, RefRR)) { + uint16_t Flags = TA.Addr->getFlags(); + if (!(Flags & NodeAttrs::Preserving)) + continue; + } + // Get the next level of reaching defs. This will include multiple + // reaching defs for shadows. + for (auto S : DFG.getRelatedRefs(TA.Addr->getOwner(DFG), TA)) + if (auto RD = NodeAddr<RefNode*>(S).Addr->getReachingDef()) + DefQ.insert(RD); + } + + // Remove all non-phi defs that are not aliased to RefRR, and collect + // the owners of the remaining defs. + SetVector<NodeId> Defs; + for (auto N : DefQ) { + auto TA = DFG.addr<DefNode*>(N); + bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef; + if (!IsPhi && !RAI.alias(RefRR, TA.Addr->getRegRef())) + continue; + Defs.insert(TA.Id); + Owners.insert(TA.Addr->getOwner(DFG).Id); + } + + // Return the MachineBasicBlock containing a given instruction. + auto Block = [this] (NodeAddr<InstrNode*> IA) -> MachineBasicBlock* { + if (IA.Addr->getKind() == NodeAttrs::Stmt) + return NodeAddr<StmtNode*>(IA).Addr->getCode()->getParent(); + assert(IA.Addr->getKind() == NodeAttrs::Phi); + NodeAddr<PhiNode*> PA = IA; + NodeAddr<BlockNode*> BA = PA.Addr->getOwner(DFG); + return BA.Addr->getCode(); + }; + // Less(A,B) iff instruction A is further down in the dominator tree than B. + auto Less = [&Block,this] (NodeId A, NodeId B) -> bool { + if (A == B) + return false; + auto OA = DFG.addr<InstrNode*>(A), OB = DFG.addr<InstrNode*>(B); + MachineBasicBlock *BA = Block(OA), *BB = Block(OB); + if (BA != BB) + return MDT.dominates(BB, BA); + // They are in the same block. + bool StmtA = OA.Addr->getKind() == NodeAttrs::Stmt; + bool StmtB = OB.Addr->getKind() == NodeAttrs::Stmt; + if (StmtA) { + if (!StmtB) // OB is a phi and phis dominate statements. + return true; + auto CA = NodeAddr<StmtNode*>(OA).Addr->getCode(); + auto CB = NodeAddr<StmtNode*>(OB).Addr->getCode(); + // The order must be linear, so tie-break such equalities. + if (CA == CB) + return A < B; + return MDT.dominates(CB, CA); + } else { + // OA is a phi. + if (StmtB) + return false; + // Both are phis. There is no ordering between phis (in terms of + // the data-flow), so tie-break this via node id comparison. + return A < B; + } + }; + + std::vector<NodeId> Tmp(Owners.begin(), Owners.end()); + std::sort(Tmp.begin(), Tmp.end(), Less); + + // The vector is a list of instructions, so that defs coming from + // the same instruction don't need to be artificially ordered. + // Then, when computing the initial segment, and iterating over an + // instruction, pick the defs that contribute to the covering (i.e. is + // not covered by previously added defs). Check the defs individually, + // i.e. first check each def if is covered or not (without adding them + // to the tracking set), and then add all the selected ones. + + // The reason for this is this example: + // *d1<A>, *d2<B>, ... Assume A and B are aliased (can happen in phi nodes). + // *d3<C> If A \incl BuC, and B \incl AuC, then *d2 would be + // covered if we added A first, and A would be covered + // if we added B first. + + NodeList RDefs; + RegisterSet RRs = DefRRs; + + auto DefInSet = [&Defs] (NodeAddr<RefNode*> TA) -> bool { + return TA.Addr->getKind() == NodeAttrs::Def && + Defs.count(TA.Id); + }; + for (auto T : Tmp) { + if (!FullChain && RAI.covers(RRs, RefRR)) + break; + auto TA = DFG.addr<InstrNode*>(T); + bool IsPhi = DFG.IsCode<NodeAttrs::Phi>(TA); + NodeList Ds; + for (NodeAddr<DefNode*> DA : TA.Addr->members_if(DefInSet, DFG)) { + auto QR = DA.Addr->getRegRef(); + // Add phi defs even if they are covered by subsequent defs. This is + // for cases where the reached use is not covered by any of the defs + // encountered so far: the phi def is needed to expose the liveness + // of that use to the entry of the block. + // Example: + // phi d1<R3>(,d2,), ... Phi def d1 is covered by d2. + // d2<R3>(d1,,u3), ... + // ..., u3<D1>(d2) This use needs to be live on entry. + if (FullChain || IsPhi || !RAI.covers(RRs, QR)) + Ds.push_back(DA); + } + RDefs.insert(RDefs.end(), Ds.begin(), Ds.end()); + for (NodeAddr<DefNode*> DA : Ds) { + // When collecting a full chain of definitions, do not consider phi + // defs to actually define a register. + uint16_t Flags = DA.Addr->getFlags(); + if (!FullChain || !(Flags & NodeAttrs::PhiRef)) + if (!(Flags & NodeAttrs::Preserving)) + RRs.insert(DA.Addr->getRegRef()); + } + } + + return RDefs; +} + + +static const RegisterSet NoRegs; + +NodeList Liveness::getAllReachingDefs(NodeAddr<RefNode*> RefA) { + return getAllReachingDefs(RefA.Addr->getRegRef(), RefA, false, NoRegs); +} + + +void Liveness::computePhiInfo() { + NodeList Phis; + NodeAddr<FuncNode*> FA = DFG.getFunc(); + auto Blocks = FA.Addr->members(DFG); + for (NodeAddr<BlockNode*> BA : Blocks) { + auto Ps = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG); + Phis.insert(Phis.end(), Ps.begin(), Ps.end()); + } + + // phi use -> (map: reaching phi -> set of registers defined in between) + std::map<NodeId,std::map<NodeId,RegisterSet>> PhiUp; + std::vector<NodeId> PhiUQ; // Work list of phis for upward propagation. + + // Go over all phis. + for (NodeAddr<PhiNode*> PhiA : Phis) { + // Go over all defs and collect the reached uses that are non-phi uses + // (i.e. the "real uses"). + auto &RealUses = RealUseMap[PhiA.Id]; + auto PhiRefs = PhiA.Addr->members(DFG); + + // Have a work queue of defs whose reached uses need to be found. + // For each def, add to the queue all reached (non-phi) defs. + SetVector<NodeId> DefQ; + NodeSet PhiDefs; + for (auto R : PhiRefs) { + if (!DFG.IsRef<NodeAttrs::Def>(R)) + continue; + DefQ.insert(R.Id); + PhiDefs.insert(R.Id); + } + for (unsigned i = 0; i < DefQ.size(); ++i) { + NodeAddr<DefNode*> DA = DFG.addr<DefNode*>(DefQ[i]); + NodeId UN = DA.Addr->getReachedUse(); + while (UN != 0) { + NodeAddr<UseNode*> A = DFG.addr<UseNode*>(UN); + if (!(A.Addr->getFlags() & NodeAttrs::PhiRef)) + RealUses[getRestrictedRegRef(A)].insert(A.Id); + UN = A.Addr->getSibling(); + } + NodeId DN = DA.Addr->getReachedDef(); + while (DN != 0) { + NodeAddr<DefNode*> A = DFG.addr<DefNode*>(DN); + for (auto T : DFG.getRelatedRefs(A.Addr->getOwner(DFG), A)) { + uint16_t Flags = NodeAddr<DefNode*>(T).Addr->getFlags(); + // Must traverse the reached-def chain. Consider: + // def(D0) -> def(R0) -> def(R0) -> use(D0) + // The reachable use of D0 passes through a def of R0. + if (!(Flags & NodeAttrs::PhiRef)) + DefQ.insert(T.Id); + } + DN = A.Addr->getSibling(); + } + } + // Filter out these uses that appear to be reachable, but really + // are not. For example: + // + // R1:0 = d1 + // = R1:0 u2 Reached by d1. + // R0 = d3 + // = R1:0 u4 Still reached by d1: indirectly through + // the def d3. + // R1 = d5 + // = R1:0 u6 Not reached by d1 (covered collectively + // by d3 and d5), but following reached + // defs and uses from d1 will lead here. + auto HasDef = [&PhiDefs] (NodeAddr<DefNode*> DA) -> bool { + return PhiDefs.count(DA.Id); + }; + for (auto UI = RealUses.begin(), UE = RealUses.end(); UI != UE; ) { + // For each reached register UI->first, there is a set UI->second, of + // uses of it. For each such use, check if it is reached by this phi, + // i.e. check if the set of its reaching uses intersects the set of + // this phi's defs. + auto &Uses = UI->second; + for (auto I = Uses.begin(), E = Uses.end(); I != E; ) { + auto UA = DFG.addr<UseNode*>(*I); + NodeList RDs = getAllReachingDefs(UI->first, UA); + if (std::any_of(RDs.begin(), RDs.end(), HasDef)) + ++I; + else + I = Uses.erase(I); + } + if (Uses.empty()) + UI = RealUses.erase(UI); + else + ++UI; + } + + // If this phi reaches some "real" uses, add it to the queue for upward + // propagation. + if (!RealUses.empty()) + PhiUQ.push_back(PhiA.Id); + + // Go over all phi uses and check if the reaching def is another phi. + // Collect the phis that are among the reaching defs of these uses. + // While traversing the list of reaching defs for each phi use, collect + // the set of registers defined between this phi (Phi) and the owner phi + // of the reaching def. + for (auto I : PhiRefs) { + if (!DFG.IsRef<NodeAttrs::Use>(I)) + continue; + NodeAddr<UseNode*> UA = I; + auto &UpMap = PhiUp[UA.Id]; + RegisterSet DefRRs; + for (NodeAddr<DefNode*> DA : getAllReachingDefs(UA)) { + if (DA.Addr->getFlags() & NodeAttrs::PhiRef) + UpMap[DA.Addr->getOwner(DFG).Id] = DefRRs; + else + DefRRs.insert(DA.Addr->getRegRef()); + } + } + } + + if (Trace) { + dbgs() << "Phi-up-to-phi map:\n"; + for (auto I : PhiUp) { + dbgs() << "phi " << Print<NodeId>(I.first, DFG) << " -> {"; + for (auto R : I.second) + dbgs() << ' ' << Print<NodeId>(R.first, DFG) + << Print<RegisterSet>(R.second, DFG); + dbgs() << " }\n"; + } + } + + // Propagate the reached registers up in the phi chain. + // + // The following type of situation needs careful handling: + // + // phi d1<R1:0> (1) + // | + // ... d2<R1> + // | + // phi u3<R1:0> (2) + // | + // ... u4<R1> + // + // The phi node (2) defines a register pair R1:0, and reaches a "real" + // use u4 of just R1. The same phi node is also known to reach (upwards) + // the phi node (1). However, the use u4 is not reached by phi (1), + // because of the intervening definition d2 of R1. The data flow between + // phis (1) and (2) is restricted to R1:0 minus R1, i.e. R0. + // + // When propagating uses up the phi chains, get the all reaching defs + // for a given phi use, and traverse the list until the propagated ref + // is covered, or until or until reaching the final phi. Only assume + // that the reference reaches the phi in the latter case. + + for (unsigned i = 0; i < PhiUQ.size(); ++i) { + auto PA = DFG.addr<PhiNode*>(PhiUQ[i]); + auto &RealUses = RealUseMap[PA.Id]; + for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) { + NodeAddr<UseNode*> UA = U; + auto &UpPhis = PhiUp[UA.Id]; + for (auto UP : UpPhis) { + bool Changed = false; + auto &MidDefs = UP.second; + // Collect the set UpReached of uses that are reached by the current + // phi PA, and are not covered by any intervening def between PA and + // the upward phi UP. + RegisterSet UpReached; + for (auto T : RealUses) { + if (!isRestricted(PA, UA, T.first)) + continue; + if (!RAI.covers(MidDefs, T.first)) + UpReached.insert(T.first); + } + if (UpReached.empty()) + continue; + // Update the set PRUs of real uses reached by the upward phi UP with + // the actual set of uses (UpReached) that the UP phi reaches. + auto &PRUs = RealUseMap[UP.first]; + for (auto R : UpReached) { + unsigned Z = PRUs[R].size(); + PRUs[R].insert(RealUses[R].begin(), RealUses[R].end()); + Changed |= (PRUs[R].size() != Z); + } + if (Changed) + PhiUQ.push_back(UP.first); + } + } + } + + if (Trace) { + dbgs() << "Real use map:\n"; + for (auto I : RealUseMap) { + dbgs() << "phi " << Print<NodeId>(I.first, DFG); + NodeAddr<PhiNode*> PA = DFG.addr<PhiNode*>(I.first); + NodeList Ds = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Def>, DFG); + if (!Ds.empty()) { + RegisterRef RR = NodeAddr<DefNode*>(Ds[0]).Addr->getRegRef(); + dbgs() << '<' << Print<RegisterRef>(RR, DFG) << '>'; + } else { + dbgs() << "<noreg>"; + } + dbgs() << " -> " << Print<RefMap>(I.second, DFG) << '\n'; + } + } +} + + +void Liveness::computeLiveIns() { + // Populate the node-to-block map. This speeds up the calculations + // significantly. + NBMap.clear(); + for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) { + MachineBasicBlock *BB = BA.Addr->getCode(); + for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) { + for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG)) + NBMap.insert(std::make_pair(RA.Id, BB)); + NBMap.insert(std::make_pair(IA.Id, BB)); + } + } + + MachineFunction &MF = DFG.getMF(); + + // Compute IDF first, then the inverse. + decltype(IIDF) IDF; + for (auto &B : MF) { + auto F1 = MDF.find(&B); + if (F1 == MDF.end()) + continue; + SetVector<MachineBasicBlock*> IDFB(F1->second.begin(), F1->second.end()); + for (unsigned i = 0; i < IDFB.size(); ++i) { + auto F2 = MDF.find(IDFB[i]); + if (F2 != MDF.end()) + IDFB.insert(F2->second.begin(), F2->second.end()); + } + // Add B to the IDF(B). This will put B in the IIDF(B). + IDFB.insert(&B); + IDF[&B].insert(IDFB.begin(), IDFB.end()); + } + + for (auto I : IDF) + for (auto S : I.second) + IIDF[S].insert(I.first); + + computePhiInfo(); + + NodeAddr<FuncNode*> FA = DFG.getFunc(); + auto Blocks = FA.Addr->members(DFG); + + // Build the phi live-on-entry map. + for (NodeAddr<BlockNode*> BA : Blocks) { + MachineBasicBlock *MB = BA.Addr->getCode(); + auto &LON = PhiLON[MB]; + for (auto P : BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG)) + for (auto S : RealUseMap[P.Id]) + LON[S.first].insert(S.second.begin(), S.second.end()); + } + + if (Trace) { + dbgs() << "Phi live-on-entry map:\n"; + for (auto I : PhiLON) + dbgs() << "block #" << I.first->getNumber() << " -> " + << Print<RefMap>(I.second, DFG) << '\n'; + } + + // Build the phi live-on-exit map. Each phi node has some set of reached + // "real" uses. Propagate this set backwards into the block predecessors + // through the reaching defs of the corresponding phi uses. + for (NodeAddr<BlockNode*> BA : Blocks) { + auto Phis = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG); + for (NodeAddr<PhiNode*> PA : Phis) { + auto &RUs = RealUseMap[PA.Id]; + if (RUs.empty()) + continue; + + for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) { + NodeAddr<PhiUseNode*> UA = U; + if (UA.Addr->getReachingDef() == 0) + continue; + + // Mark all reached "real" uses of P as live on exit in the + // predecessor. + // Remap all the RUs so that they have a correct reaching def. + auto PrA = DFG.addr<BlockNode*>(UA.Addr->getPredecessor()); + auto &LOX = PhiLOX[PrA.Addr->getCode()]; + for (auto R : RUs) { + RegisterRef RR = R.first; + if (!isRestricted(PA, UA, RR)) + RR = getRestrictedRegRef(UA); + // The restricted ref may be different from the ref that was + // accessed in the "real use". This means that this phi use + // is not the one that carries this reference, so skip it. + if (!RAI.alias(R.first, RR)) + continue; + for (auto D : getAllReachingDefs(RR, UA)) + LOX[RR].insert(D.Id); + } + } // for U : phi uses + } // for P : Phis + } // for B : Blocks + + if (Trace) { + dbgs() << "Phi live-on-exit map:\n"; + for (auto I : PhiLOX) + dbgs() << "block #" << I.first->getNumber() << " -> " + << Print<RefMap>(I.second, DFG) << '\n'; + } + + RefMap LiveIn; + traverse(&MF.front(), LiveIn); + + // Add function live-ins to the live-in set of the function entry block. + auto &EntryIn = LiveMap[&MF.front()]; + for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) + EntryIn.insert({I->first,0}); + + if (Trace) { + // Dump the liveness map + for (auto &B : MF) { + BitVector LV(TRI.getNumRegs()); + for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I) + LV.set(I->PhysReg); + dbgs() << "BB#" << B.getNumber() << "\t rec = {"; + for (int x = LV.find_first(); x >= 0; x = LV.find_next(x)) + dbgs() << ' ' << Print<RegisterRef>({unsigned(x),0}, DFG); + dbgs() << " }\n"; + dbgs() << "\tcomp = " << Print<RegisterSet>(LiveMap[&B], DFG) << '\n'; + } + } +} + + +void Liveness::resetLiveIns() { + for (auto &B : DFG.getMF()) { + // Remove all live-ins. + std::vector<unsigned> T; + for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I) + T.push_back(I->PhysReg); + for (auto I : T) + B.removeLiveIn(I); + // Add the newly computed live-ins. + auto &LiveIns = LiveMap[&B]; + for (auto I : LiveIns) { + assert(I.Sub == 0); + B.addLiveIn(I.Reg); + } + } +} + + +void Liveness::resetKills() { + for (auto &B : DFG.getMF()) + resetKills(&B); +} + + +void Liveness::resetKills(MachineBasicBlock *B) { + auto CopyLiveIns = [] (MachineBasicBlock *B, BitVector &LV) -> void { + for (auto I = B->livein_begin(), E = B->livein_end(); I != E; ++I) + LV.set(I->PhysReg); + }; + + BitVector LiveIn(TRI.getNumRegs()), Live(TRI.getNumRegs()); + CopyLiveIns(B, LiveIn); + for (auto SI : B->successors()) + CopyLiveIns(SI, Live); + + for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) { + MachineInstr *MI = &*I; + if (MI->isDebugValue()) + continue; + + MI->clearKillInfo(); + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(R)) + continue; + for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR) + Live.reset(*SR); + } + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(R)) + continue; + bool IsLive = false; + for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR) { + if (!Live[*SR]) + continue; + IsLive = true; + break; + } + if (IsLive) + continue; + Op.setIsKill(true); + for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR) + Live.set(*SR); + } + } +} + + +// For shadows, determine if RR is aliased to a reaching def of any other +// shadow associated with RA. If it is not, then RR is "restricted" to RA, +// and so it can be considered a value specific to RA. This is important +// for accurately determining values associated with phi uses. +// For non-shadows, this function returns "true". +bool Liveness::isRestricted(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA, + RegisterRef RR) const { + NodeId Start = RA.Id; + for (NodeAddr<RefNode*> TA = DFG.getNextShadow(IA, RA); + TA.Id != 0 && TA.Id != Start; TA = DFG.getNextShadow(IA, TA)) { + NodeId RD = TA.Addr->getReachingDef(); + if (RD == 0) + continue; + if (RAI.alias(RR, DFG.addr<DefNode*>(RD).Addr->getRegRef())) + return false; + } + return true; +} + + +RegisterRef Liveness::getRestrictedRegRef(NodeAddr<RefNode*> RA) const { + assert(DFG.IsRef<NodeAttrs::Use>(RA)); + if (RA.Addr->getFlags() & NodeAttrs::Shadow) { + NodeId RD = RA.Addr->getReachingDef(); + assert(RD); + RA = DFG.addr<DefNode*>(RD); + } + return RA.Addr->getRegRef(); +} + + +unsigned Liveness::getPhysReg(RegisterRef RR) const { + if (!TargetRegisterInfo::isPhysicalRegister(RR.Reg)) + return 0; + return RR.Sub ? TRI.getSubReg(RR.Reg, RR.Sub) : RR.Reg; +} + + +// Helper function to obtain the basic block containing the reaching def +// of the given use. +MachineBasicBlock *Liveness::getBlockWithRef(NodeId RN) const { + auto F = NBMap.find(RN); + if (F != NBMap.end()) + return F->second; + llvm_unreachable("Node id not in map"); +} + + +void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { + // The LiveIn map, for each (physical) register, contains the set of live + // reaching defs of that register that are live on entry to the associated + // block. + + // The summary of the traversal algorithm: + // + // R is live-in in B, if there exists a U(R), such that rdef(R) dom B + // and (U \in IDF(B) or B dom U). + // + // for (C : children) { + // LU = {} + // traverse(C, LU) + // LiveUses += LU + // } + // + // LiveUses -= Defs(B); + // LiveUses += UpwardExposedUses(B); + // for (C : IIDF[B]) + // for (U : LiveUses) + // if (Rdef(U) dom C) + // C.addLiveIn(U) + // + + // Go up the dominator tree (depth-first). + MachineDomTreeNode *N = MDT.getNode(B); + for (auto I : *N) { + RefMap L; + MachineBasicBlock *SB = I->getBlock(); + traverse(SB, L); + + for (auto S : L) + LiveIn[S.first].insert(S.second.begin(), S.second.end()); + } + + if (Trace) { + dbgs() << LLVM_FUNCTION_NAME << " in BB#" << B->getNumber() + << " after recursion into"; + for (auto I : *N) + dbgs() << ' ' << I->getBlock()->getNumber(); + dbgs() << "\n LiveIn: " << Print<RefMap>(LiveIn, DFG); + dbgs() << "\n Local: " << Print<RegisterSet>(LiveMap[B], DFG) << '\n'; + } + + // Add phi uses that are live on exit from this block. + RefMap &PUs = PhiLOX[B]; + for (auto S : PUs) + LiveIn[S.first].insert(S.second.begin(), S.second.end()); + + if (Trace) { + dbgs() << "after LOX\n"; + dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; + dbgs() << " Local: " << Print<RegisterSet>(LiveMap[B], DFG) << '\n'; + } + + // Stop tracking all uses defined in this block: erase those records + // where the reaching def is located in B and which cover all reached + // uses. + auto Copy = LiveIn; + LiveIn.clear(); + + for (auto I : Copy) { + auto &Defs = LiveIn[I.first]; + NodeSet Rest; + for (auto R : I.second) { + auto DA = DFG.addr<DefNode*>(R); + RegisterRef DDR = DA.Addr->getRegRef(); + NodeAddr<InstrNode*> IA = DA.Addr->getOwner(DFG); + NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG); + // Defs from a different block need to be preserved. Defs from this + // block will need to be processed further, except for phi defs, the + // liveness of which is handled through the PhiLON/PhiLOX maps. + if (B != BA.Addr->getCode()) + Defs.insert(R); + else { + bool IsPreserving = DA.Addr->getFlags() & NodeAttrs::Preserving; + if (IA.Addr->getKind() != NodeAttrs::Phi && !IsPreserving) { + bool Covering = RAI.covers(DDR, I.first); + NodeId U = DA.Addr->getReachedUse(); + while (U && Covering) { + auto DUA = DFG.addr<UseNode*>(U); + RegisterRef Q = DUA.Addr->getRegRef(); + Covering = RAI.covers(DA.Addr->getRegRef(), Q); + U = DUA.Addr->getSibling(); + } + if (!Covering) + Rest.insert(R); + } + } + } + + // Non-covering defs from B. + for (auto R : Rest) { + auto DA = DFG.addr<DefNode*>(R); + RegisterRef DRR = DA.Addr->getRegRef(); + RegisterSet RRs; + for (NodeAddr<DefNode*> TA : getAllReachingDefs(DA)) { + NodeAddr<InstrNode*> IA = TA.Addr->getOwner(DFG); + NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG); + // Preserving defs do not count towards covering. + if (!(TA.Addr->getFlags() & NodeAttrs::Preserving)) + RRs.insert(TA.Addr->getRegRef()); + if (BA.Addr->getCode() == B) + continue; + if (RAI.covers(RRs, DRR)) + break; + Defs.insert(TA.Id); + } + } + } + + emptify(LiveIn); + + if (Trace) { + dbgs() << "after defs in block\n"; + dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; + dbgs() << " Local: " << Print<RegisterSet>(LiveMap[B], DFG) << '\n'; + } + + // Scan the block for upward-exposed uses and add them to the tracking set. + for (auto I : DFG.getFunc().Addr->findBlock(B, DFG).Addr->members(DFG)) { + NodeAddr<InstrNode*> IA = I; + if (IA.Addr->getKind() != NodeAttrs::Stmt) + continue; + for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) { + RegisterRef RR = UA.Addr->getRegRef(); + for (auto D : getAllReachingDefs(UA)) + if (getBlockWithRef(D.Id) != B) + LiveIn[RR].insert(D.Id); + } + } + + if (Trace) { + dbgs() << "after uses in block\n"; + dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; + dbgs() << " Local: " << Print<RegisterSet>(LiveMap[B], DFG) << '\n'; + } + + // Phi uses should not be propagated up the dominator tree, since they + // are not dominated by their corresponding reaching defs. + auto &Local = LiveMap[B]; + auto &LON = PhiLON[B]; + for (auto R : LON) + Local.insert(R.first); + + if (Trace) { + dbgs() << "after phi uses in block\n"; + dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; + dbgs() << " Local: " << Print<RegisterSet>(Local, DFG) << '\n'; + } + + for (auto C : IIDF[B]) { + auto &LiveC = LiveMap[C]; + for (auto S : LiveIn) + for (auto R : S.second) + if (MDT.properlyDominates(getBlockWithRef(R), C)) + LiveC.insert(S.first); + } +} + + +void Liveness::emptify(RefMap &M) { + for (auto I = M.begin(), E = M.end(); I != E; ) + I = I->second.empty() ? M.erase(I) : std::next(I); +} + diff --git a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.h b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.h new file mode 100644 index 0000000..4c1e8f3 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.h @@ -0,0 +1,106 @@ +//===--- RDFLiveness.h ----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Recalculate the liveness information given a data flow graph. +// This includes block live-ins and kill flags. + +#ifndef RDF_LIVENESS_H +#define RDF_LIVENESS_H + +#include "RDFGraph.h" +#include "llvm/ADT/DenseMap.h" +#include <map> + +using namespace llvm; + +namespace llvm { + class MachineBasicBlock; + class MachineFunction; + class MachineRegisterInfo; + class TargetRegisterInfo; + class MachineDominatorTree; + class MachineDominanceFrontier; +} + +namespace rdf { + struct Liveness { + public: + typedef std::map<MachineBasicBlock*,RegisterSet> LiveMapType; + typedef std::map<RegisterRef,NodeSet> RefMap; + + Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g) + : DFG(g), TRI(g.getTRI()), MDT(g.getDT()), MDF(g.getDF()), + RAI(g.getRAI()), MRI(mri), Empty(), Trace(false) {} + + NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA, + bool FullChain = false, const RegisterSet &DefRRs = RegisterSet()); + NodeList getAllReachingDefs(NodeAddr<RefNode*> RefA); + + LiveMapType &getLiveMap() { return LiveMap; } + const LiveMapType &getLiveMap() const { return LiveMap; } + const RefMap &getRealUses(NodeId P) const { + auto F = RealUseMap.find(P); + return F == RealUseMap.end() ? Empty : F->second; + } + + void computePhiInfo(); + void computeLiveIns(); + void resetLiveIns(); + void resetKills(); + void resetKills(MachineBasicBlock *B); + + void trace(bool T) { Trace = T; } + + private: + const DataFlowGraph &DFG; + const TargetRegisterInfo &TRI; + const MachineDominatorTree &MDT; + const MachineDominanceFrontier &MDF; + const RegisterAliasInfo &RAI; + MachineRegisterInfo &MRI; + LiveMapType LiveMap; + const RefMap Empty; + bool Trace; + + // Cache of mapping from node ids (for RefNodes) to the containing + // basic blocks. Not computing it each time for each node reduces + // the liveness calculation time by a large fraction. + typedef DenseMap<NodeId,MachineBasicBlock*> NodeBlockMap; + NodeBlockMap NBMap; + + // Phi information: + // + // map: NodeId -> (map: RegisterRef -> NodeSet) + // phi id -> (map: register -> set of reached non-phi uses) + std::map<NodeId, RefMap> RealUseMap; + + // Inverse iterated dominance frontier. + std::map<MachineBasicBlock*,std::set<MachineBasicBlock*>> IIDF; + + // Live on entry. + std::map<MachineBasicBlock*,RefMap> PhiLON; + + // Phi uses are considered to be located at the end of the block that + // they are associated with. The reaching def of a phi use dominates the + // block that the use corresponds to, but not the block that contains + // the phi itself. To include these uses in the liveness propagation (up + // the dominator tree), create a map: block -> set of uses live on exit. + std::map<MachineBasicBlock*,RefMap> PhiLOX; + + bool isRestricted(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA, + RegisterRef RR) const; + RegisterRef getRestrictedRegRef(NodeAddr<RefNode*> RA) const; + unsigned getPhysReg(RegisterRef RR) const; + MachineBasicBlock *getBlockWithRef(NodeId RN) const; + void traverse(MachineBasicBlock *B, RefMap &LiveIn); + void emptify(RefMap &M); + }; +} + +#endif // RDF_LIVENESS_H diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp index 6756c17..5680130 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -277,8 +277,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT, MVT::i32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); setOperationAction(ISD::SETCC, MVT::f32, Custom); setOperationAction(ISD::SETCC, MVT::f64, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Custom); @@ -327,6 +325,8 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::BR_CC, MVT::i64, Expand); setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); @@ -872,7 +872,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return lowerJumpTable(Op, DAG); case ISD::SELECT: return lowerSELECT(Op, DAG); - case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG); case ISD::SETCC: return lowerSETCC(Op, DAG); case ISD::VASTART: return lowerVASTART(Op, DAG); case ISD::VAARG: return lowerVAARG(Op, DAG); @@ -1648,20 +1647,6 @@ lowerSELECT(SDValue Op, SelectionDAG &DAG) const SDLoc(Op)); } -SDValue MipsTargetLowering:: -lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const -{ - SDLoc DL(Op); - EVT Ty = Op.getOperand(0).getValueType(); - SDValue Cond = - DAG.getNode(ISD::SETCC, DL, getSetCCResultType(DAG.getDataLayout(), - *DAG.getContext(), Ty), - Op.getOperand(0), Op.getOperand(1), Op.getOperand(4)); - - return DAG.getNode(ISD::SELECT, DL, Op.getValueType(), Cond, Op.getOperand(2), - Op.getOperand(3)); -} - SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const { assert(!Subtarget.hasMips32r6() && !Subtarget.hasMips64r6()); SDValue Cond = createFPCmp(DAG, Op); diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h index b33e125..0dc683e 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h @@ -430,7 +430,6 @@ namespace llvm { SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td index d9fb8c8..ffda491 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td @@ -1003,7 +1003,7 @@ class IndirectBranch<string opstr, RegisterOperand RO> : JumpFR<opstr, RO> { let isCall=1, hasDelaySlot=1, Defs = [RA] in { class JumpLink<string opstr, DAGOperand opnd> : InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"), - [(MipsJmpLink imm:$target)], II_JAL, FrmJ, opstr> { + [(MipsJmpLink tglobaladdr:$target)], II_JAL, FrmJ, opstr> { let DecoderMethod = "DecodeJumpTarget"; } @@ -2075,8 +2075,6 @@ def : MipsPat<(MipsSync (i32 immz)), (SYNC 0)>, ISA_MIPS2; // Call -def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)), - (JAL tglobaladdr:$dst)>; def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)), (JAL texternalsym:$dst)>; //def : MipsPat<(MipsJmpLink GPR32:$dst), diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp index e6f7fe9..d4aeaf9 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -544,8 +544,6 @@ void MipsSEInstrInfo::expandPseudoMTLoHi(MachineBasicBlock &MBB, const MachineOperand &SrcLo = I->getOperand(1), &SrcHi = I->getOperand(2); MachineInstrBuilder LoInst = BuildMI(MBB, I, DL, get(LoOpc)); MachineInstrBuilder HiInst = BuildMI(MBB, I, DL, get(HiOpc)); - LoInst.addReg(SrcLo.getReg(), getKillRegState(SrcLo.isKill())); - HiInst.addReg(SrcHi.getReg(), getKillRegState(SrcHi.isKill())); // Add lo/hi registers if the mtlo/hi instructions created have explicit // def registers. @@ -556,6 +554,9 @@ void MipsSEInstrInfo::expandPseudoMTLoHi(MachineBasicBlock &MBB, LoInst.addReg(DstLo, RegState::Define); HiInst.addReg(DstHi, RegState::Define); } + + LoInst.addReg(SrcLo.getReg(), getKillRegState(SrcLo.isKill())); + HiInst.addReg(SrcHi.getReg(), getKillRegState(SrcHi.isKill())); } void MipsSEInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB, diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 7663696..be735f6 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -4549,6 +4549,7 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { delete static_cast<NVPTXSection *>(DwarfLocSection); delete static_cast<NVPTXSection *>(DwarfARangesSection); delete static_cast<NVPTXSection *>(DwarfRangesSection); + delete static_cast<NVPTXSection *>(DwarfMacinfoSection); } MCSection * diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h index 0f88ddf..683b9a3 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -41,6 +41,7 @@ public: DwarfLocSection = nullptr; DwarfARangesSection = nullptr; DwarfRangesSection = nullptr; + DwarfMacinfoSection = nullptr; } virtual ~NVPTXTargetObjectFile(); @@ -81,6 +82,8 @@ public: new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfMacinfoSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); } MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 9a63c14..ec354c2 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1092,8 +1092,28 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { } // ELFv2 ABI - Normal entry label. - if (Subtarget->isELFv2ABI()) + if (Subtarget->isELFv2ABI()) { + // In the Large code model, we allow arbitrary displacements between + // the text section and its associated TOC section. We place the + // full 8-byte offset to the TOC in memory immediatedly preceding + // the function global entry point. + if (TM.getCodeModel() == CodeModel::Large + && !MF->getRegInfo().use_empty(PPC::X2)) { + const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>(); + + MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC.")); + MCSymbol *GlobalEPSymbol = PPCFI->getGlobalEPSymbol(); + const MCExpr *TOCDeltaExpr = + MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext), + MCSymbolRefExpr::create(GlobalEPSymbol, + OutContext), + OutContext); + + OutStreamer->EmitLabel(PPCFI->getTOCOffsetSymbol()); + OutStreamer->EmitValue(TOCDeltaExpr, 8); + } return AsmPrinter::EmitFunctionEntryLabel(); + } // Emit an official procedure descriptor. MCSectionSubPair Current = OutStreamer->getCurrentSection(); @@ -1160,10 +1180,25 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() { // thus emit a prefix sequence along the following lines: // // func: + // .Lfunc_gepNN: + // # global entry point + // addis r2,r12,(.TOC.-.Lfunc_gepNN)@ha + // addi r2,r2,(.TOC.-.Lfunc_gepNN)@l + // .Lfunc_lepNN: + // .localentry func, .Lfunc_lepNN-.Lfunc_gepNN + // # local entry point, followed by function body + // + // For the Large code model, we create + // + // .Lfunc_tocNN: + // .quad .TOC.-.Lfunc_gepNN # done by EmitFunctionEntryLabel + // func: + // .Lfunc_gepNN: // # global entry point - // addis r2,r12,(.TOC.-func)@ha - // addi r2,r2,(.TOC.-func)@l - // .localentry func, .-func + // ld r2,.Lfunc_tocNN-.Lfunc_gepNN(r12) + // add r2,r2,r12 + // .Lfunc_lepNN: + // .localentry func, .Lfunc_lepNN-.Lfunc_gepNN // # local entry point, followed by function body // // This ensures we have r2 set up correctly while executing the function @@ -1171,32 +1206,49 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() { if (Subtarget->isELFv2ABI() // Only do all that if the function uses r2 in the first place. && !MF->getRegInfo().use_empty(PPC::X2)) { + const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>(); - MCSymbol *GlobalEntryLabel = OutContext.createTempSymbol(); + MCSymbol *GlobalEntryLabel = PPCFI->getGlobalEPSymbol(); OutStreamer->EmitLabel(GlobalEntryLabel); const MCSymbolRefExpr *GlobalEntryLabelExp = MCSymbolRefExpr::create(GlobalEntryLabel, OutContext); - MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC.")); - const MCExpr *TOCDeltaExpr = - MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext), - GlobalEntryLabelExp, OutContext); + if (TM.getCodeModel() != CodeModel::Large) { + MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC.")); + const MCExpr *TOCDeltaExpr = + MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext), + GlobalEntryLabelExp, OutContext); - const MCExpr *TOCDeltaHi = - PPCMCExpr::createHa(TOCDeltaExpr, false, OutContext); - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS) - .addReg(PPC::X2) - .addReg(PPC::X12) - .addExpr(TOCDeltaHi)); - - const MCExpr *TOCDeltaLo = - PPCMCExpr::createLo(TOCDeltaExpr, false, OutContext); - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI) - .addReg(PPC::X2) - .addReg(PPC::X2) - .addExpr(TOCDeltaLo)); - - MCSymbol *LocalEntryLabel = OutContext.createTempSymbol(); + const MCExpr *TOCDeltaHi = + PPCMCExpr::createHa(TOCDeltaExpr, false, OutContext); + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS) + .addReg(PPC::X2) + .addReg(PPC::X12) + .addExpr(TOCDeltaHi)); + + const MCExpr *TOCDeltaLo = + PPCMCExpr::createLo(TOCDeltaExpr, false, OutContext); + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI) + .addReg(PPC::X2) + .addReg(PPC::X2) + .addExpr(TOCDeltaLo)); + } else { + MCSymbol *TOCOffset = PPCFI->getTOCOffsetSymbol(); + const MCExpr *TOCOffsetDeltaExpr = + MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCOffset, OutContext), + GlobalEntryLabelExp, OutContext); + + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD) + .addReg(PPC::X2) + .addExpr(TOCOffsetDeltaExpr) + .addReg(PPC::X12)); + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADD8) + .addReg(PPC::X2) + .addReg(PPC::X2) + .addReg(PPC::X12)); + } + + MCSymbol *LocalEntryLabel = PPCFI->getLocalEPSymbol(); OutStreamer->EmitLabel(LocalEntryLabel); const MCSymbolRefExpr *LocalEntryLabelExp = MCSymbolRefExpr::create(LocalEntryLabel, OutContext); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 075e093..79e4fe3 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -299,22 +299,35 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), // 64-bit CR instructions let Interpretation64Bit = 1, isCodeGenOnly = 1 in { let hasSideEffects = 0 in { +// mtocrf's input needs to be prepared by shifting by an amount dependent +// on the cr register selected. Thus, post-ra anti-dep breaking must not +// later change that register assignment. +let hasExtraDefRegAllocReq = 1 in { def MTOCRF8: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins g8rc:$ST), "mtocrf $FXM, $ST", IIC_BrMCRX>, PPC970_DGroup_First, PPC970_Unit_CRU; +// Similarly to mtocrf, the mask for mtcrf must be prepared in a way that +// is dependent on the cr fields being set. def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS), "mtcrf $FXM, $rS", IIC_BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; +} // hasExtraDefRegAllocReq = 1 -let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking. +// mfocrf's input needs to be prepared by shifting by an amount dependent +// on the cr register selected. Thus, post-ra anti-dep breaking must not +// later change that register assignment. +let hasExtraSrcRegAllocReq = 1 in { def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM), "mfocrf $rT, $FXM", IIC_SprMFCRF>, PPC970_DGroup_First, PPC970_Unit_CRU; +// Similarly to mfocrf, the mask for mfcrf must be prepared in a way that +// is dependent on the cr fields being copied. def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins), "mfcr $rT", IIC_SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; +} // hasExtraSrcRegAllocReq = 1 } // hasSideEffects = 0 let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index c17603a..dcff6ad 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -744,20 +744,43 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, "isel is for regular integer GPRs only"); unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL; - unsigned SelectPred = Cond[0].getImm(); + auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm()); unsigned SubIdx; bool SwapOps; switch (SelectPred) { - default: llvm_unreachable("invalid predicate for isel"); - case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break; - case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break; - case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break; - case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break; - case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break; - case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break; - case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break; - case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break; + case PPC::PRED_EQ: + case PPC::PRED_EQ_MINUS: + case PPC::PRED_EQ_PLUS: + SubIdx = PPC::sub_eq; SwapOps = false; break; + case PPC::PRED_NE: + case PPC::PRED_NE_MINUS: + case PPC::PRED_NE_PLUS: + SubIdx = PPC::sub_eq; SwapOps = true; break; + case PPC::PRED_LT: + case PPC::PRED_LT_MINUS: + case PPC::PRED_LT_PLUS: + SubIdx = PPC::sub_lt; SwapOps = false; break; + case PPC::PRED_GE: + case PPC::PRED_GE_MINUS: + case PPC::PRED_GE_PLUS: + SubIdx = PPC::sub_lt; SwapOps = true; break; + case PPC::PRED_GT: + case PPC::PRED_GT_MINUS: + case PPC::PRED_GT_PLUS: + SubIdx = PPC::sub_gt; SwapOps = false; break; + case PPC::PRED_LE: + case PPC::PRED_LE_MINUS: + case PPC::PRED_LE_PLUS: + SubIdx = PPC::sub_gt; SwapOps = true; break; + case PPC::PRED_UN: + case PPC::PRED_UN_MINUS: + case PPC::PRED_UN_PLUS: + SubIdx = PPC::sub_un; SwapOps = false; break; + case PPC::PRED_NU: + case PPC::PRED_NU_MINUS: + case PPC::PRED_NU_PLUS: + SubIdx = PPC::sub_un; SwapOps = true; break; case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break; case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 6c4364a..ce0f9e6 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -2299,22 +2299,35 @@ def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F), "#RESTORE_VRSAVE", []>; let hasSideEffects = 0 in { +// mtocrf's input needs to be prepared by shifting by an amount dependent +// on the cr register selected. Thus, post-ra anti-dep breaking must not +// later change that register assignment. +let hasExtraDefRegAllocReq = 1 in { def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$ST), "mtocrf $FXM, $ST", IIC_BrMCRX>, PPC970_DGroup_First, PPC970_Unit_CRU; +// Similarly to mtocrf, the mask for mtcrf must be prepared in a way that +// is dependent on the cr fields being set. def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS), "mtcrf $FXM, $rS", IIC_BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; +} // hasExtraDefRegAllocReq = 1 -let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking. +// mfocrf's input needs to be prepared by shifting by an amount dependent +// on the cr register selected. Thus, post-ra anti-dep breaking must not +// later change that register assignment. +let hasExtraSrcRegAllocReq = 1 in { def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM), "mfocrf $rT, $FXM", IIC_SprMFCRF>, PPC970_DGroup_First, PPC970_Unit_CRU; +// Similarly to mfocrf, the mask for mfcrf must be prepared in a way that +// is dependent on the cr fields being copied. def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins), "mfcr $rT", IIC_SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; +} // hasExtraSrcRegAllocReq = 1 } // hasSideEffects = 0 // Pseudo instruction to perform FADD in round-to-zero mode. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp index 95f1631..9d91e31 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp @@ -23,3 +23,24 @@ MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const { Twine(MF.getFunctionNumber()) + "$poff"); } + +MCSymbol *PPCFunctionInfo::getGlobalEPSymbol() const { + const DataLayout &DL = MF.getDataLayout(); + return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) + + "func_gep" + + Twine(MF.getFunctionNumber())); +} + +MCSymbol *PPCFunctionInfo::getLocalEPSymbol() const { + const DataLayout &DL = MF.getDataLayout(); + return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) + + "func_lep" + + Twine(MF.getFunctionNumber())); +} + +MCSymbol *PPCFunctionInfo::getTOCOffsetSymbol() const { + const DataLayout &DL = MF.getDataLayout(); + return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) + + "func_toc" + + Twine(MF.getFunctionNumber())); +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 607cdf6..10a8ce0 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -197,6 +197,10 @@ public: bool usesPICBase() const { return UsesPICBase; } MCSymbol *getPICOffsetSymbol() const; + + MCSymbol *getGlobalEPSymbol() const; + MCSymbol *getLocalEPSymbol() const; + MCSymbol *getTOCOffsetSymbol() const; }; } // end of namespace llvm diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp index 2dc0d82..a9d2e88 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -99,6 +99,11 @@ protected: break; } + // Don't really need to save data to the stack - the clobbered + // registers are already saved when the SDNode (e.g. PPCaddiTlsgdLAddr) + // gets translated to the pseudo instruction (e.g. ADDItlsgdLADDR). + BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0); + // Expand into two ops built prior to the existing instruction. MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3) .addReg(InReg); @@ -113,6 +118,8 @@ protected: .addReg(GPR3)); Call->addOperand(MI->getOperand(3)); + BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0); + BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg) .addReg(GPR3); diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp index 733027a..05006ac 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp @@ -83,7 +83,6 @@ static bool IsIntegerCC(unsigned CC) return (CC <= SPCC::ICC_VC); } - static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC) { switch(CC) { @@ -124,106 +123,103 @@ static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC) llvm_unreachable("Invalid cond code"); } -bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, - MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const -{ - - MachineBasicBlock::iterator I = MBB.end(); - MachineBasicBlock::iterator UnCondBrIter = MBB.end(); - while (I != MBB.begin()) { - --I; +static bool isUncondBranchOpcode(int Opc) { return Opc == SP::BA; } - if (I->isDebugValue()) - continue; +static bool isCondBranchOpcode(int Opc) { + return Opc == SP::FBCOND || Opc == SP::BCOND; +} - // When we see a non-terminator, we are done. - if (!isUnpredicatedTerminator(I)) - break; +static bool isIndirectBranchOpcode(int Opc) { + return Opc == SP::BINDrr || Opc == SP::BINDri; +} - // Terminator is not a branch. - if (!I->isBranch()) - return true; +static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, + SmallVectorImpl<MachineOperand> &Cond) { + Cond.push_back(MachineOperand::CreateImm(LastInst->getOperand(1).getImm())); + Target = LastInst->getOperand(0).getMBB(); +} - // Handle Unconditional branches. - if (I->getOpcode() == SP::BA) { - UnCondBrIter = I; +bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) + return false; + + if (!isUnpredicatedTerminator(I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + unsigned LastOpc = LastInst->getOpcode(); + + // If there is only one terminator instruction, process it. + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (isUncondBranchOpcode(LastOpc)) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } + if (isCondBranchOpcode(LastOpc)) { + // Block ends with fall-through condbranch. + parseCondBranch(LastInst, TBB, Cond); + return false; + } + return true; // Can't handle indirect branch. + } - if (!AllowModify) { - TBB = I->getOperand(0).getMBB(); - continue; + // Get the instruction before it if it is a terminator. + MachineInstr *SecondLastInst = I; + unsigned SecondLastOpc = SecondLastInst->getOpcode(); + + // If AllowModify is true and the block ends with two or more unconditional + // branches, delete all but the first unconditional branch. + if (AllowModify && isUncondBranchOpcode(LastOpc)) { + while (isUncondBranchOpcode(SecondLastOpc)) { + LastInst->eraseFromParent(); + LastInst = SecondLastInst; + LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + // Return now the only terminator is an unconditional branch. + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else { + SecondLastInst = I; + SecondLastOpc = SecondLastInst->getOpcode(); } + } + } - while (std::next(I) != MBB.end()) - std::next(I)->eraseFromParent(); - - Cond.clear(); - FBB = nullptr; + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) + return true; - if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { - TBB = nullptr; - I->eraseFromParent(); - I = MBB.end(); - UnCondBrIter = MBB.end(); - continue; - } + // If the block ends with a B and a Bcc, handle it. + if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { + parseCondBranch(SecondLastInst, TBB, Cond); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } - TBB = I->getOperand(0).getMBB(); - continue; - } + // If the block ends with two unconditional branches, handle it. The second + // one is not executed. + if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { + TBB = SecondLastInst->getOperand(0).getMBB(); + return false; + } - unsigned Opcode = I->getOpcode(); - if (Opcode != SP::BCOND && Opcode != SP::FBCOND) - return true; // Unknown Opcode. - - SPCC::CondCodes BranchCode = (SPCC::CondCodes)I->getOperand(1).getImm(); - - if (Cond.empty()) { - MachineBasicBlock *TargetBB = I->getOperand(0).getMBB(); - if (AllowModify && UnCondBrIter != MBB.end() && - MBB.isLayoutSuccessor(TargetBB)) { - - // Transform the code - // - // brCC L1 - // ba L2 - // L1: - // .. - // L2: - // - // into - // - // brnCC L2 - // L1: - // ... - // L2: - // - BranchCode = GetOppositeBranchCondition(BranchCode); - MachineBasicBlock::iterator OldInst = I; - BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(Opcode)) - .addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode); - BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA)) - .addMBB(TargetBB); - - OldInst->eraseFromParent(); - UnCondBrIter->eraseFromParent(); - - UnCondBrIter = MBB.end(); - I = MBB.end(); - continue; - } - FBB = TBB; - TBB = I->getOperand(0).getMBB(); - Cond.push_back(MachineOperand::CreateImm(BranchCode)); - continue; - } - // FIXME: Handle subsequent conditional branches. - // For now, we can't handle multiple conditional branches. + // ...likewise if it ends with an indirect branch followed by an unconditional + // branch. + if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { + I = LastInst; + if (AllowModify) + I->eraseFromParent(); return true; } - return false; + + // Otherwise, can't handle this. + return true; } unsigned @@ -277,6 +273,14 @@ unsigned SparcInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const return Count; } +bool SparcInstrInfo::ReverseBranchCondition( + SmallVectorImpl<MachineOperand> &Cond) const { + assert(Cond.size() == 1); + SPCC::CondCodes CC = static_cast<SPCC::CondCodes>(Cond[0].getImm()); + Cond[0].setImm(GetOppositeBranchCondition(CC)); + return false; +} + void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h index 15673f1..9de624c 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h +++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h @@ -76,6 +76,9 @@ public: MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, DebugLoc DL) const override; + bool + ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index b9f2eb5..d5dabc2 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1219,6 +1219,9 @@ def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>; // Atomic operations //===----------------------------------------------------------------------===// +// A serialization instruction that acts as a barrier for all memory +// accesses, which expands to "bcr 14, 0". +let hasSideEffects = 1 in def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>; let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in { diff --git a/contrib/llvm/lib/Target/WebAssembly/Disassembler/CMakeLists.txt b/contrib/llvm/lib/Target/WebAssembly/Disassembler/CMakeLists.txt new file mode 100644 index 0000000..5e55e29 --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/Disassembler/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMWebAssemblyDisassembler + WebAssemblyDisassembler.cpp + ) diff --git a/contrib/llvm/lib/Target/WebAssembly/Disassembler/LLVMBuild.txt b/contrib/llvm/lib/Target/WebAssembly/Disassembler/LLVMBuild.txt new file mode 100644 index 0000000..a452ca1 --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/Disassembler/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===-- ./lib/Target/WebAssembly/Disassembler/LLVMBuild.txt -----*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = WebAssemblyDisassembler +parent = WebAssembly +required_libraries = MCDisassembler WebAssemblyInfo Support +add_to_library_groups = WebAssembly diff --git a/contrib/llvm/lib/Target/WebAssembly/Disassembler/Makefile b/contrib/llvm/lib/Target/WebAssembly/Disassembler/Makefile new file mode 100644 index 0000000..bcd36ba --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/Disassembler/Makefile @@ -0,0 +1,16 @@ +##===-- lib/Target/WebAssembly/Disassembler/Makefile -------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMWebAssemblyDisassembler + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp new file mode 100644 index 0000000..0143b10 --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -0,0 +1,148 @@ +//==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file is part of the WebAssembly Disassembler. +/// +/// It contains code to translate the data produced by the decoder into +/// MCInsts. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-disassembler" + +namespace { +class WebAssemblyDisassembler final : public MCDisassembler { + std::unique_ptr<const MCInstrInfo> MCII; + + DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &VStream, + raw_ostream &CStream) const override; + +public: + WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, + std::unique_ptr<const MCInstrInfo> MCII) + : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {} +}; +} // end anonymous namespace + +static MCDisassembler *createWebAssemblyDisassembler(const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo()); + return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII)); +} + +extern "C" void LLVMInitializeWebAssemblyDisassembler() { + // Register the disassembler for each target. + TargetRegistry::RegisterMCDisassembler(TheWebAssemblyTarget32, + createWebAssemblyDisassembler); + TargetRegistry::RegisterMCDisassembler(TheWebAssemblyTarget64, + createWebAssemblyDisassembler); +} + +MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( + MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/, + raw_ostream &OS, raw_ostream &CS) const { + Size = 0; + uint64_t Pos = 0; + + // Read the opcode. + if (Pos + sizeof(uint64_t) > Bytes.size()) + return MCDisassembler::Fail; + uint64_t Opcode = support::endian::read64le(Bytes.data() + Pos); + Pos += sizeof(uint64_t); + + if (Opcode >= WebAssembly::INSTRUCTION_LIST_END) + return MCDisassembler::Fail; + + MI.setOpcode(Opcode); + const MCInstrDesc &Desc = MCII->get(Opcode); + unsigned NumFixedOperands = Desc.NumOperands; + + // If it's variadic, read the number of extra operands. + unsigned NumExtraOperands = 0; + if (Desc.isVariadic()) { + if (Pos + sizeof(uint64_t) > Bytes.size()) + return MCDisassembler::Fail; + NumExtraOperands = support::endian::read64le(Bytes.data() + Pos); + Pos += sizeof(uint64_t); + } + + // Read the fixed operands. These are described by the MCInstrDesc. + for (unsigned i = 0; i < NumFixedOperands; ++i) { + const MCOperandInfo &Info = Desc.OpInfo[i]; + switch (Info.OperandType) { + case MCOI::OPERAND_IMMEDIATE: + case WebAssembly::OPERAND_BASIC_BLOCK: { + if (Pos + sizeof(uint64_t) > Bytes.size()) + return MCDisassembler::Fail; + uint64_t Imm = support::endian::read64le(Bytes.data() + Pos); + Pos += sizeof(uint64_t); + MI.addOperand(MCOperand::createImm(Imm)); + break; + } + case MCOI::OPERAND_REGISTER: { + if (Pos + sizeof(uint64_t) > Bytes.size()) + return MCDisassembler::Fail; + uint64_t Reg = support::endian::read64le(Bytes.data() + Pos); + Pos += sizeof(uint64_t); + MI.addOperand(MCOperand::createReg(Reg)); + break; + } + case WebAssembly::OPERAND_FPIMM: { + // TODO: MC converts all floating point immediate operands to double. + // This is fine for numeric values, but may cause NaNs to change bits. + if (Pos + sizeof(uint64_t) > Bytes.size()) + return MCDisassembler::Fail; + uint64_t Bits = support::endian::read64le(Bytes.data() + Pos); + Pos += sizeof(uint64_t); + double Imm; + memcpy(&Imm, &Bits, sizeof(Imm)); + MI.addOperand(MCOperand::createFPImm(Imm)); + break; + } + default: + llvm_unreachable("unimplemented operand kind"); + } + } + + // Read the extra operands. + assert(NumExtraOperands == 0 || Desc.isVariadic()); + for (unsigned i = 0; i < NumExtraOperands; ++i) { + if (Pos + sizeof(uint64_t) > Bytes.size()) + return MCDisassembler::Fail; + if (Desc.TSFlags & WebAssemblyII::VariableOpIsImmediate) { + // Decode extra immediate operands. + uint64_t Imm = support::endian::read64le(Bytes.data() + Pos); + MI.addOperand(MCOperand::createImm(Imm)); + } else { + // Decode extra register operands. + uint64_t Reg = support::endian::read64le(Bytes.data() + Pos); + MI.addOperand(MCOperand::createReg(Reg)); + } + Pos += sizeof(uint64_t); + } + + Size = Pos; + return MCDisassembler::Success; +} diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp index 7ce3a00..9a95150 100644 --- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp @@ -16,6 +16,8 @@ #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" @@ -33,7 +35,7 @@ using namespace llvm; WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI) - : MCInstPrinter(MAI, MII, MRI) {} + : MCInstPrinter(MAI, MII, MRI), ControlFlowCounter(0) {} void WebAssemblyInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { @@ -59,6 +61,52 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, // Print any added annotation. printAnnotation(OS, Annot); + + if (CommentStream) { + // Observe any effects on the control flow stack, for use in annotating + // control flow label references. + switch (MI->getOpcode()) { + default: + break; + case WebAssembly::LOOP: { + // Grab the TopLabel value first so that labels print in numeric order. + uint64_t TopLabel = ControlFlowCounter++; + ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false)); + printAnnotation(OS, "label" + utostr(TopLabel) + ':'); + ControlFlowStack.push_back(std::make_pair(TopLabel, true)); + break; + } + case WebAssembly::BLOCK: + ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false)); + break; + case WebAssembly::END_LOOP: + ControlFlowStack.pop_back(); + printAnnotation( + OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':'); + break; + case WebAssembly::END_BLOCK: + printAnnotation( + OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':'); + break; + } + + // Annotate any control flow label references. + unsigned NumFixedOperands = Desc.NumOperands; + SmallSet<uint64_t, 8> Printed; + for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { + const MCOperandInfo &Info = Desc.OpInfo[i]; + if (!(i < NumFixedOperands + ? (Info.OperandType == WebAssembly::OPERAND_BASIC_BLOCK) + : (Desc.TSFlags & WebAssemblyII::VariableOpImmediateIsLabel))) + continue; + uint64_t Depth = MI->getOperand(i).getImm(); + if (!Printed.insert(Depth).second) + continue; + const auto &Pair = ControlFlowStack.rbegin()[Depth]; + printAnnotation(OS, utostr(Depth) + ": " + (Pair.second ? "up" : "down") + + " to label" + utostr(Pair.first)); + } + } } static std::string toString(const APFloat &FP) { @@ -82,6 +130,9 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { + assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() || + MII.get(MI->getOpcode()).TSFlags == 0) && + "WebAssembly variable_ops register ops don't use TSFlags"); unsigned WAReg = Op.getReg(); if (int(WAReg) >= 0) printRegName(O, WAReg); @@ -95,19 +146,27 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, if (OpNo < MII.get(MI->getOpcode()).getNumDefs()) O << '='; } else if (Op.isImm()) { - switch (MI->getOpcode()) { - case WebAssembly::PARAM: - case WebAssembly::RESULT: - case WebAssembly::LOCAL: - O << WebAssembly::TypeToString(MVT::SimpleValueType(Op.getImm())); - break; - default: - O << Op.getImm(); - break; - } - } else if (Op.isFPImm()) + assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() || + (MII.get(MI->getOpcode()).TSFlags & + WebAssemblyII::VariableOpIsImmediate)) && + "WebAssemblyII::VariableOpIsImmediate should be set for " + "variable_ops immediate ops"); + // TODO: (MII.get(MI->getOpcode()).TSFlags & + // WebAssemblyII::VariableOpImmediateIsLabel) + // can tell us whether this is an immediate referencing a label in the + // control flow stack, and it may be nice to pretty-print. + O << Op.getImm(); + } else if (Op.isFPImm()) { + assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() || + MII.get(MI->getOpcode()).TSFlags == 0) && + "WebAssembly variable_ops floating point ops don't use TSFlags"); O << toString(APFloat(Op.getFPImm())); - else { + } else { + assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() || + (MII.get(MI->getOpcode()).TSFlags & + WebAssemblyII::VariableOpIsImmediate)) && + "WebAssemblyII::VariableOpIsImmediate should be set for " + "variable_ops expr ops"); assert(Op.isExpr() && "unknown operand kind in printOperand"); Op.getExpr()->print(O, &MAI); } diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h index 39a16f5..cd6c59a 100644 --- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h +++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h @@ -23,6 +23,9 @@ namespace llvm { class MCSubtargetInfo; class WebAssemblyInstPrinter final : public MCInstPrinter { + uint64_t ControlFlowCounter; + SmallVector<std::pair<uint64_t, bool>, 0> ControlFlowStack; + public: WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI); diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp index b158ccb..bba06f6 100644 --- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -95,9 +95,6 @@ WebAssemblyAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { } } // end anonymous namespace -MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Target &T, - const MCRegisterInfo &MRI, - const Triple &TT, - StringRef CPU) { +MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Triple &TT) { return new WebAssemblyAsmBackend(TT.isArch64Bit()); } diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp index c47a3d9..2bb58b3 100644 --- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp @@ -30,19 +30,31 @@ protected: }; } // end anonymous namespace -// FIXME: Use EM_NONE as a temporary hack. Should we decide to pursue ELF -// writing seriously, we should email generic-abi@googlegroups.com and ask -// for our own ELF code. WebAssemblyELFObjectWriter::WebAssemblyELFObjectWriter(bool Is64Bit, uint8_t OSABI) - : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_NONE, - /*HasRelocationAddend=*/true) {} + : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_WEBASSEMBLY, + /*HasRelocationAddend=*/false) {} unsigned WebAssemblyELFObjectWriter::GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { - // FIXME: Do we need our own relocs? - return Fixup.getKind(); + // WebAssembly functions are not allocated in the address space. To resolve a + // pointer to a function, we must use a special relocation type. + if (const MCSymbolRefExpr *SyExp = + dyn_cast<MCSymbolRefExpr>(Fixup.getValue())) + if (SyExp->getKind() == MCSymbolRefExpr::VK_WebAssembly_FUNCTION) + return ELF::R_WEBASSEMBLY_FUNCTION; + + switch (Fixup.getKind()) { + case FK_Data_4: + assert(!is64Bit() && "4-byte relocations only supported on wasm32"); + return ELF::R_WEBASSEMBLY_DATA; + case FK_Data_8: + assert(is64Bit() && "8-byte relocations only supported on wasm64"); + return ELF::R_WEBASSEMBLY_DATA; + default: + llvm_unreachable("unimplemented fixup kind"); + } } MCObjectWriter *llvm::createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS, diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp index d261779..02c717a 100644 --- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp @@ -27,11 +27,12 @@ WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) { // TODO: What should MaxInstLength be? - PrivateGlobalPrefix = ""; - PrivateLabelPrefix = ""; - UseDataRegionDirectives = true; + // Use .skip instead of .zero because .zero is confusing when used with two + // arguments (it doesn't actually zero things out). + ZeroDirective = "\t.skip\t"; + Data8bitsDirective = "\t.int8\t"; Data16bitsDirective = "\t.int16\t"; Data32bitsDirective = "\t.int32\t"; diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp index 7c6c79e..f409bd7 100644 --- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCFixup.h" @@ -26,75 +27,66 @@ using namespace llvm; #define DEBUG_TYPE "mccodeemitter" +STATISTIC(MCNumEmitted, "Number of MC instructions emitted."); +STATISTIC(MCNumFixups, "Number of MC fixups created."); + namespace { class WebAssemblyMCCodeEmitter final : public MCCodeEmitter { - const MCRegisterInfo &MRI; - -public: - WebAssemblyMCCodeEmitter(const MCInstrInfo &, const MCRegisterInfo &mri, - MCContext &) - : MRI(mri) {} + const MCInstrInfo &MCII; + const MCContext &Ctx; - ~WebAssemblyMCCodeEmitter() override {} - - /// TableGen'erated function for getting the binary encoding for an - /// instruction. + // Implementation generated by tablegen. uint64_t getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; - /// Return binary encoding of operand. If the machine operand requires - /// relocation, record the relocation and return zero. - unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - uint64_t getMemoryOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - void encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const override; + +public: + WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) + : MCII(mcii), Ctx(ctx) {} }; } // end anonymous namespace MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { - return new WebAssemblyMCCodeEmitter(MCII, MRI, Ctx); -} - -unsigned WebAssemblyMCCodeEmitter::getMachineOpValue( - const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - if (MO.isReg()) - return MRI.getEncodingValue(MO.getReg()); - if (MO.isImm()) - return static_cast<unsigned>(MO.getImm()); - - assert(MO.isExpr()); - - assert(MO.getExpr()->getKind() == MCExpr::SymbolRef); - - assert(false && "FIXME: not implemented yet"); - - return 0; + return new WebAssemblyMCCodeEmitter(MCII, Ctx); } void WebAssemblyMCCodeEmitter::encodeInstruction( const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { - assert(false && "FIXME: not implemented yet"); -} - -// Encode WebAssembly Memory Operand -uint64_t -WebAssemblyMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - assert(false && "FIXME: not implemented yet"); - return 0; + // FIXME: This is not the real binary encoding. This is an extremely + // over-simplified encoding where we just use uint64_t for everything. This + // is a temporary measure. + support::endian::Writer<support::little>(OS).write<uint64_t>(MI.getOpcode()); + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + if (Desc.isVariadic()) + support::endian::Writer<support::little>(OS).write<uint64_t>( + MI.getNumOperands() - Desc.NumOperands); + for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) { + const MCOperand &MO = MI.getOperand(i); + if (MO.isReg()) { + support::endian::Writer<support::little>(OS).write<uint64_t>(MO.getReg()); + } else if (MO.isImm()) { + support::endian::Writer<support::little>(OS).write<uint64_t>(MO.getImm()); + } else if (MO.isFPImm()) { + support::endian::Writer<support::little>(OS).write<double>(MO.getFPImm()); + } else if (MO.isExpr()) { + support::endian::Writer<support::little>(OS).write<uint64_t>(0); + Fixups.push_back(MCFixup::create( + (1 + MCII.get(MI.getOpcode()).isVariadic() + i) * sizeof(uint64_t), + MO.getExpr(), STI.getTargetTriple().isArch64Bit() ? FK_Data_8 : FK_Data_4, + MI.getLoc())); + ++MCNumFixups; + } else { + llvm_unreachable("unexpected operand kind"); + } + } + + ++MCNumEmitted; // Keep track of the # of mi's emitted. } #include "WebAssemblyGenMCCodeEmitter.inc" diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp index 14cd295..37000f1 100644 --- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp @@ -15,10 +15,10 @@ #include "WebAssemblyMCTargetDesc.h" #include "InstPrinter/WebAssemblyInstPrinter.h" #include "WebAssemblyMCAsmInfo.h" +#include "WebAssemblyTargetStreamer.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" @@ -35,52 +35,89 @@ using namespace llvm; #define GET_REGINFO_MC_DESC #include "WebAssemblyGenRegisterInfo.inc" -static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo & /*MRI*/, - const Triple &TT) { +static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo & /*MRI*/, + const Triple &TT) { return new WebAssemblyMCAsmInfo(TT); } -static MCInstrInfo *createWebAssemblyMCInstrInfo() { +static MCInstrInfo *createMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitWebAssemblyMCInstrInfo(X); return X; } -static MCStreamer *createWebAssemblyMCStreamer(const Triple &T, MCContext &Ctx, - MCAsmBackend &MAB, - raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll) { - return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll); +static MCRegisterInfo *createMCRegisterInfo(const Triple & /*T*/) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitWebAssemblyMCRegisterInfo(X, 0); + return X; } -static MCInstPrinter * -createWebAssemblyMCInstPrinter(const Triple & /*T*/, unsigned SyntaxVariant, - const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI) { +static MCInstPrinter *createMCInstPrinter(const Triple & /*T*/, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { assert(SyntaxVariant == 0); return new WebAssemblyInstPrinter(MAI, MII, MRI); } +static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo & /*MRI*/, + MCContext &Ctx) { + return createWebAssemblyMCCodeEmitter(MCII, Ctx); +} + +static MCAsmBackend *createAsmBackend(const Target & /*T*/, + const MCRegisterInfo & /*MRI*/, + const Triple &TT, StringRef /*CPU*/) { + return createWebAssemblyAsmBackend(TT); +} + +static MCSubtargetInfo *createMCSubtargetInfo(const Triple &TT, StringRef CPU, + StringRef FS) { + return createWebAssemblyMCSubtargetInfoImpl(TT, CPU, FS); +} + +static MCTargetStreamer * +createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo & /*STI*/) { + return new WebAssemblyTargetELFStreamer(S); +} + +static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter * /*InstPrint*/, + bool /*isVerboseAsm*/) { + return new WebAssemblyTargetAsmStreamer(S, OS); +} + // Force static initialization. extern "C" void LLVMInitializeWebAssemblyTargetMC() { for (Target *T : {&TheWebAssemblyTarget32, &TheWebAssemblyTarget64}) { // Register the MC asm info. - RegisterMCAsmInfoFn X(*T, createWebAssemblyMCAsmInfo); + RegisterMCAsmInfoFn X(*T, createMCAsmInfo); // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(*T, createWebAssemblyMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(*T, createMCInstrInfo); - // Register the object streamer - TargetRegistry::RegisterELFStreamer(*T, createWebAssemblyMCStreamer); + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createMCRegisterInfo); // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(*T, createWebAssemblyMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(*T, createMCInstPrinter); + + // Register the MC code emitter. + TargetRegistry::RegisterMCCodeEmitter(*T, createCodeEmitter); + + // Register the ASM Backend. + TargetRegistry::RegisterMCAsmBackend(*T, createAsmBackend); - // Register the MC code emitter - TargetRegistry::RegisterMCCodeEmitter(*T, createWebAssemblyMCCodeEmitter); + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, createMCSubtargetInfo); - // Register the ASM Backend - TargetRegistry::RegisterMCAsmBackend(*T, createWebAssemblyAsmBackend); + // Register the object target streamer. + TargetRegistry::RegisterObjectTargetStreamer(*T, + createObjectTargetStreamer); + // Register the asm target streamer. + TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer); } } diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index e78f73e..9bac4f8 100644 --- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -15,40 +15,62 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/DataTypes.h" namespace llvm { -class formatted_raw_ostream; class MCAsmBackend; class MCCodeEmitter; class MCContext; class MCInstrInfo; -class MCRegisterInfo; class MCObjectWriter; -class MCStreamer; class MCSubtargetInfo; -class MCTargetStreamer; -class StringRef; class Target; class Triple; -class raw_ostream; class raw_pwrite_stream; extern Target TheWebAssemblyTarget32; extern Target TheWebAssemblyTarget64; MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); -MCAsmBackend *createWebAssemblyAsmBackend(const Target &T, - const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU); +MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT); MCObjectWriter *createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, uint8_t OSABI); +namespace WebAssembly { +enum OperandType { + /// Basic block label in a branch construct. + OPERAND_BASIC_BLOCK = MCOI::OPERAND_FIRST_TARGET, + /// Floating-point immediate. + OPERAND_FPIMM +}; + +/// WebAssembly-specific directive identifiers. +enum Directive { + // FIXME: This is not the real binary encoding. + DotParam = UINT64_MAX - 0, ///< .param + DotResult = UINT64_MAX - 1, ///< .result + DotLocal = UINT64_MAX - 2, ///< .local + DotEndFunc = UINT64_MAX - 3, ///< .endfunc +}; + +} // end namespace WebAssembly + +namespace WebAssemblyII { +enum { + // For variadic instructions, this flag indicates whether an operand + // in the variable_ops range is an immediate value. + VariableOpIsImmediate = (1 << 0), + // For immediate values in the variable_ops range, this flag indicates + // whether the value represents a control-flow label. + VariableOpImmediateIsLabel = (1 << 1), +}; +} // end namespace WebAssemblyII + } // end namespace llvm // Defines symbolic names for WebAssembly registers. This defines a mapping from diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp new file mode 100644 index 0000000..1d28228 --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp @@ -0,0 +1,94 @@ +//==-- WebAssemblyTargetStreamer.cpp - WebAssembly Target Streamer Methods --=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file defines WebAssembly-specific target streamer classes. +/// These are for implementing support for target-specific assembly directives. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyTargetStreamer.h" +#include "InstPrinter/WebAssemblyInstPrinter.h" +#include "WebAssemblyMCTargetDesc.h" +#include "WebAssemblyTargetObjectFile.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +using namespace llvm; + +WebAssemblyTargetStreamer::WebAssemblyTargetStreamer(MCStreamer &S) + : MCTargetStreamer(S) {} + +WebAssemblyTargetAsmStreamer::WebAssemblyTargetAsmStreamer( + MCStreamer &S, formatted_raw_ostream &OS) + : WebAssemblyTargetStreamer(S), OS(OS) {} + +WebAssemblyTargetELFStreamer::WebAssemblyTargetELFStreamer(MCStreamer &S) + : WebAssemblyTargetStreamer(S) {} + +static void PrintTypes(formatted_raw_ostream &OS, ArrayRef<MVT> Types) { + bool First = true; + for (MVT Type : Types) { + if (First) + First = false; + else + OS << ", "; + OS << WebAssembly::TypeToString(Type); + } + OS << '\n'; +} + +void WebAssemblyTargetAsmStreamer::emitParam(ArrayRef<MVT> Types) { + OS << "\t.param \t"; + PrintTypes(OS, Types); +} + +void WebAssemblyTargetAsmStreamer::emitResult(ArrayRef<MVT> Types) { + OS << "\t.result \t"; + PrintTypes(OS, Types); +} + +void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<MVT> Types) { + OS << "\t.local \t"; + PrintTypes(OS, Types); +} + +void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; } + +// FIXME: What follows is not the real binary encoding. + +static void EncodeTypes(MCStreamer &Streamer, ArrayRef<MVT> Types) { + Streamer.EmitIntValue(Types.size(), sizeof(uint64_t)); + for (MVT Type : Types) + Streamer.EmitIntValue(Type.SimpleTy, sizeof(uint64_t)); +} + +void WebAssemblyTargetELFStreamer::emitParam(ArrayRef<MVT> Types) { + Streamer.EmitIntValue(WebAssembly::DotParam, sizeof(uint64_t)); + EncodeTypes(Streamer, Types); +} + +void WebAssemblyTargetELFStreamer::emitResult(ArrayRef<MVT> Types) { + Streamer.EmitIntValue(WebAssembly::DotResult, sizeof(uint64_t)); + EncodeTypes(Streamer, Types); +} + +void WebAssemblyTargetELFStreamer::emitLocal(ArrayRef<MVT> Types) { + Streamer.EmitIntValue(WebAssembly::DotLocal, sizeof(uint64_t)); + EncodeTypes(Streamer, Types); +} + +void WebAssemblyTargetELFStreamer::emitEndFunc() { + Streamer.EmitIntValue(WebAssembly::DotEndFunc, sizeof(uint64_t)); +} diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h new file mode 100644 index 0000000..c66a515 --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h @@ -0,0 +1,68 @@ +//==-- WebAssemblyTargetStreamer.h - WebAssembly Target Streamer -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares WebAssembly-specific target streamer classes. +/// These are for implementing support for target-specific assembly directives. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H + +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/MC/MCStreamer.h" + +namespace llvm { + +class MCELFStreamer; + +/// WebAssembly-specific streamer interface, to implement support +/// WebAssembly-specific assembly directives. +class WebAssemblyTargetStreamer : public MCTargetStreamer { +public: + explicit WebAssemblyTargetStreamer(MCStreamer &S); + + /// .param + virtual void emitParam(ArrayRef<MVT> Types) = 0; + /// .result + virtual void emitResult(ArrayRef<MVT> Types) = 0; + /// .local + virtual void emitLocal(ArrayRef<MVT> Types) = 0; + /// .endfunc + virtual void emitEndFunc() = 0; +}; + +/// This part is for ascii assembly output +class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer { + formatted_raw_ostream &OS; + +public: + WebAssemblyTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); + + void emitParam(ArrayRef<MVT> Types) override; + void emitResult(ArrayRef<MVT> Types) override; + void emitLocal(ArrayRef<MVT> Types) override; + void emitEndFunc() override; +}; + +/// This part is for ELF object output +class WebAssemblyTargetELFStreamer final : public WebAssemblyTargetStreamer { +public: + explicit WebAssemblyTargetELFStreamer(MCStreamer &S); + + void emitParam(ArrayRef<MVT> Types) override; + void emitResult(ArrayRef<MVT> Types) override; + void emitLocal(ArrayRef<MVT> Types) override; + void emitEndFunc() override; +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index 0d2b4d9..45ac99d 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -17,6 +17,7 @@ #include "WebAssembly.h" #include "InstPrinter/WebAssemblyInstPrinter.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "MCTargetDesc/WebAssemblyTargetStreamer.h" #include "WebAssemblyMCInstLower.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblyRegisterInfo.h" @@ -69,7 +70,9 @@ private: void EmitJumpTableInfo() override; void EmitConstantPool() override; void EmitFunctionBodyStart() override; + void EmitFunctionBodyEnd() override; void EmitInstruction(const MachineInstr *MI) override; + const MCExpr *lowerConstant(const Constant *CV) override; bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS) override; @@ -80,6 +83,7 @@ private: MVT getRegType(unsigned RegNo) const; const char *toString(MVT VT) const; std::string regToString(const MachineOperand &MO); + WebAssemblyTargetStreamer *getTargetStreamer(); }; } // end anonymous namespace @@ -90,9 +94,9 @@ private: MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const { const TargetRegisterClass *TRC = - TargetRegisterInfo::isVirtualRegister(RegNo) ? - MRI->getRegClass(RegNo) : - MRI->getTargetRegisterInfo()->getMinimalPhysRegClass(RegNo); + TargetRegisterInfo::isVirtualRegister(RegNo) + ? MRI->getRegClass(RegNo) + : MRI->getTargetRegisterInfo()->getMinimalPhysRegClass(RegNo); for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) if (TRC->hasType(T)) return T; @@ -101,6 +105,10 @@ MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const { return MVT::Other; } +const char *WebAssemblyAsmPrinter::toString(MVT VT) const { + return WebAssembly::TypeToString(VT); +} + std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) { unsigned RegNo = MO.getReg(); assert(TargetRegisterInfo::isVirtualRegister(RegNo) && @@ -111,8 +119,10 @@ std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) { return '$' + utostr(WAReg); } -const char *WebAssemblyAsmPrinter::toString(MVT VT) const { - return WebAssembly::TypeToString(VT); +WebAssemblyTargetStreamer * +WebAssemblyAsmPrinter::getTargetStreamer() { + MCTargetStreamer *TS = OutStreamer->getTargetStreamer(); + return static_cast<WebAssemblyTargetStreamer *>(TS); } //===----------------------------------------------------------------------===// @@ -145,29 +155,20 @@ static void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM, } void WebAssemblyAsmPrinter::EmitFunctionBodyStart() { - if (!MFI->getParams().empty()) { - MCInst Param; - Param.setOpcode(WebAssembly::PARAM); - for (MVT VT : MFI->getParams()) - Param.addOperand(MCOperand::createImm(VT.SimpleTy)); - EmitToStreamer(*OutStreamer, Param); - } + if (!MFI->getParams().empty()) + getTargetStreamer()->emitParam(MFI->getParams()); SmallVector<MVT, 4> ResultVTs; const Function &F(*MF->getFunction()); ComputeLegalValueVTs(F, TM, F.getReturnType(), ResultVTs); + // If the return type needs to be legalized it will get converted into // passing a pointer. - if (ResultVTs.size() == 1) { - MCInst Result; - Result.setOpcode(WebAssembly::RESULT); - Result.addOperand(MCOperand::createImm(ResultVTs.front().SimpleTy)); - EmitToStreamer(*OutStreamer, Result); - } + if (ResultVTs.size() == 1) + getTargetStreamer()->emitResult(ResultVTs); bool AnyWARegs = false; - MCInst Local; - Local.setOpcode(WebAssembly::LOCAL); + SmallVector<MVT, 16> LocalTypes; for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) { unsigned VReg = TargetRegisterInfo::index2VirtReg(Idx); unsigned WAReg = MFI->getWAReg(VReg); @@ -180,22 +181,26 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() { // Don't declare stackified registers. if (int(WAReg) < 0) continue; - Local.addOperand(MCOperand::createImm(getRegType(VReg).SimpleTy)); + LocalTypes.push_back(getRegType(VReg)); AnyWARegs = true; } auto &PhysRegs = MFI->getPhysRegs(); for (unsigned PReg = 0; PReg < PhysRegs.size(); ++PReg) { if (PhysRegs[PReg] == -1U) continue; - Local.addOperand(MCOperand::createImm(getRegType(PReg).SimpleTy)); + LocalTypes.push_back(getRegType(PReg)); AnyWARegs = true; } if (AnyWARegs) - EmitToStreamer(*OutStreamer, Local); + getTargetStreamer()->emitLocal(LocalTypes); AsmPrinter::EmitFunctionBodyStart(); } +void WebAssemblyAsmPrinter::EmitFunctionBodyEnd() { + getTargetStreamer()->emitEndFunc(); +} + void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n'); @@ -207,10 +212,6 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { // These represent values which are live into the function entry, so there's // no instruction to emit. break; - case WebAssembly::LOOP_END: - // This is a no-op which just exists to tell AsmPrinter.cpp that there's a - // fallthrough which nevertheless requires a label for the destination here. - break; default: { WebAssemblyMCInstLower MCInstLowering(OutContext, *this); MCInst TmpInst; @@ -221,6 +222,14 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { } } +const MCExpr *WebAssemblyAsmPrinter::lowerConstant(const Constant *CV) { + if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) + if (GV->getValueType()->isFunctionTy()) + return MCSymbolRefExpr::create( + getSymbol(GV), MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext); + return AsmPrinter::lowerConstant(CV); +} + bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index e9671ee..a39349c 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -256,7 +257,8 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) { /// code) for a branch instruction to both branch to a block and fallthrough /// to it, so we check the actual branch operands to see if there are any /// explicit mentions. -static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred, MachineBasicBlock *MBB) { +static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred, + MachineBasicBlock *MBB) { for (MachineInstr &MI : Pred->terminators()) for (MachineOperand &MO : MI.explicit_operands()) if (MO.isMBB() && MO.getMBB() == MBB) @@ -325,13 +327,21 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF, InsertPos = Header->getFirstTerminator(); while (InsertPos != Header->begin() && prev(InsertPos)->definesRegister(WebAssembly::EXPR_STACK) && - prev(InsertPos)->getOpcode() != WebAssembly::LOOP) + prev(InsertPos)->getOpcode() != WebAssembly::LOOP && + prev(InsertPos)->getOpcode() != WebAssembly::END_BLOCK && + prev(InsertPos)->getOpcode() != WebAssembly::END_LOOP) --InsertPos; } // Add the BLOCK. - BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK)) - .addMBB(&MBB); + BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK)); + + // Mark the end of the block. + InsertPos = MBB.begin(); + while (InsertPos != MBB.end() && + InsertPos->getOpcode() == WebAssembly::END_LOOP) + ++InsertPos; + BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::END_BLOCK)); // Track the farthest-spanning scope that ends at this point. int Number = MBB.getNumber(); @@ -341,10 +351,11 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF, } /// Insert a LOOP marker for a loop starting at MBB (if it's a loop header). -static void PlaceLoopMarker(MachineBasicBlock &MBB, MachineFunction &MF, - SmallVectorImpl<MachineBasicBlock *> &ScopeTops, - const WebAssemblyInstrInfo &TII, - const MachineLoopInfo &MLI) { +static void PlaceLoopMarker( + MachineBasicBlock &MBB, MachineFunction &MF, + SmallVectorImpl<MachineBasicBlock *> &ScopeTops, + DenseMap<const MachineInstr *, const MachineBasicBlock *> &LoopTops, + const WebAssemblyInstrInfo &TII, const MachineLoopInfo &MLI) { MachineLoop *Loop = MLI.getLoopFor(&MBB); if (!Loop || Loop->getHeader() != &MBB) return; @@ -361,14 +372,19 @@ static void PlaceLoopMarker(MachineBasicBlock &MBB, MachineFunction &MF, Iter = next(MachineFunction::iterator(Bottom)); } MachineBasicBlock *AfterLoop = &*Iter; - BuildMI(MBB, MBB.begin(), DebugLoc(), TII.get(WebAssembly::LOOP)) - .addMBB(AfterLoop); - // Emit a special no-op telling the asm printer that we need a label to close - // the loop scope, even though the destination is only reachable by - // fallthrough. - if (!Bottom->back().isBarrier()) - BuildMI(*Bottom, Bottom->end(), DebugLoc(), TII.get(WebAssembly::LOOP_END)); + // Mark the beginning of the loop (after the end of any existing loop that + // ends here). + auto InsertPos = MBB.begin(); + while (InsertPos != MBB.end() && + InsertPos->getOpcode() == WebAssembly::END_LOOP) + ++InsertPos; + BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::LOOP)); + + // Mark the end of the loop. + MachineInstr *End = BuildMI(*AfterLoop, AfterLoop->begin(), DebugLoc(), + TII.get(WebAssembly::END_LOOP)); + LoopTops[End] = &MBB; assert((!ScopeTops[AfterLoop->getNumber()] || ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) && @@ -377,6 +393,19 @@ static void PlaceLoopMarker(MachineBasicBlock &MBB, MachineFunction &MF, ScopeTops[AfterLoop->getNumber()] = &MBB; } +static unsigned +GetDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack, + const MachineBasicBlock *MBB) { + unsigned Depth = 0; + for (auto X : reverse(Stack)) { + if (X == MBB) + break; + ++Depth; + } + assert(Depth < Stack.size() && "Branch destination should be in scope"); + return Depth; +} + /// Insert LOOP and BLOCK markers at appropriate places. static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI, const WebAssemblyInstrInfo &TII, @@ -388,25 +417,57 @@ static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI, // we may insert at the end. SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1); + // For eacn LOOP_END, the corresponding LOOP. + DenseMap<const MachineInstr *, const MachineBasicBlock *> LoopTops; + for (auto &MBB : MF) { // Place the LOOP for MBB if MBB is the header of a loop. - PlaceLoopMarker(MBB, MF, ScopeTops, TII, MLI); + PlaceLoopMarker(MBB, MF, ScopeTops, LoopTops, TII, MLI); // Place the BLOCK for MBB if MBB is branched to from above. PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT); } -} -#ifndef NDEBUG -static bool -IsOnStack(const SmallVectorImpl<std::pair<MachineBasicBlock *, bool>> &Stack, - const MachineBasicBlock *MBB) { - for (const auto &Pair : Stack) - if (Pair.first == MBB) - return true; - return false; + // Now rewrite references to basic blocks to be depth immediates. + SmallVector<const MachineBasicBlock *, 8> Stack; + for (auto &MBB : reverse(MF)) { + for (auto &MI : reverse(MBB)) { + switch (MI.getOpcode()) { + case WebAssembly::BLOCK: + assert(ScopeTops[Stack.back()->getNumber()] == &MBB && + "Block should be balanced"); + Stack.pop_back(); + break; + case WebAssembly::LOOP: + assert(Stack.back() == &MBB && "Loop top should be balanced"); + Stack.pop_back(); + Stack.pop_back(); + break; + case WebAssembly::END_BLOCK: + Stack.push_back(&MBB); + break; + case WebAssembly::END_LOOP: + Stack.push_back(&MBB); + Stack.push_back(LoopTops[&MI]); + break; + default: + if (MI.isTerminator()) { + // Rewrite MBB operands to be depth immediates. + SmallVector<MachineOperand, 4> Ops(MI.operands()); + while (MI.getNumOperands() > 0) + MI.RemoveOperand(MI.getNumOperands() - 1); + for (auto MO : Ops) { + if (MO.isMBB()) + MO = MachineOperand::CreateImm(GetDepth(Stack, MO.getMBB())); + MI.addOperand(MF, MO); + } + } + break; + } + } + } + assert(Stack.empty() && "Control flow should be balanced"); } -#endif bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** CFG Stackifying **********\n" @@ -415,7 +476,9 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) { const auto &MLI = getAnalysis<MachineLoopInfo>(); auto &MDT = getAnalysis<MachineDominatorTree>(); + // Liveness is not tracked for EXPR_STACK physreg. const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + MF.getRegInfo().invalidateLiveness(); // RPO sorting needs all loops to be single-entry. EliminateMultipleEntryLoops(MF, MLI); @@ -426,43 +489,5 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) { // Place the BLOCK and LOOP markers to indicate the beginnings of scopes. PlaceMarkers(MF, MLI, TII, MDT); -#ifndef NDEBUG - // Verify that block and loop beginnings and endings are in LIFO order, and - // that all references to blocks are to blocks on the stack at the point of - // the reference. - SmallVector<std::pair<MachineBasicBlock *, bool>, 0> Stack; - for (auto &MBB : MF) { - while (!Stack.empty() && Stack.back().first == &MBB) - if (Stack.back().second) { - assert(Stack.size() >= 2); - Stack.pop_back(); - Stack.pop_back(); - } else { - assert(Stack.size() >= 1); - Stack.pop_back(); - } - for (auto &MI : MBB) - switch (MI.getOpcode()) { - case WebAssembly::LOOP: - Stack.push_back(std::make_pair(&MBB, false)); - Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), true)); - break; - case WebAssembly::BLOCK: - Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), false)); - break; - default: - // Verify that all referenced blocks are in scope. A reference to a - // block with a negative number is invalid, but can happen with inline - // asm, so we shouldn't assert on it, but instead let CodeGen properly - // fail on it. - for (const MachineOperand &MO : MI.explicit_operands()) - if (MO.isMBB() && MO.getMBB()->getNumber() >= 0) - assert(IsOnStack(Stack, MO.getMBB())); - break; - } - } - assert(Stack.empty()); -#endif - return true; } diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 7a89f78..e9933b0 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -573,7 +573,8 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, SDLoc DL(Op); const auto *GA = cast<GlobalAddressSDNode>(Op); EVT VT = Op.getValueType(); - assert(GA->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); + assert(GA->getTargetFlags() == 0 && + "Unexpected target flags on generic GlobalAddressSDNode"); if (GA->getAddressSpace() != 0) fail(DL, DAG, "WebAssembly only expects the 0 address space"); return DAG.getNode( @@ -587,9 +588,16 @@ WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op, SDLoc DL(Op); const auto *ES = cast<ExternalSymbolSDNode>(Op); EVT VT = Op.getValueType(); - assert(ES->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); + assert(ES->getTargetFlags() == 0 && + "Unexpected target flags on generic ExternalSymbolSDNode"); + // Set the TargetFlags to 0x1 which indicates that this is a "function" + // symbol rather than a data symbol. We do this unconditionally even though + // we don't know anything about the symbol other than its name, because all + // external symbols used in target-independent SelectionDAG code are for + // functions. return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, - DAG.getTargetExternalSymbol(ES->getSymbol(), VT)); + DAG.getTargetExternalSymbol(ES->getSymbol(), VT, + /*TargetFlags=*/0x1)); } SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op, diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td index 05efe89..fda9595 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -41,28 +41,33 @@ let Defs = [ARGUMENTS] in { // TODO: SelectionDAG's lowering insists on using a pointer as the index for // jump tables, so in practice we don't ever use TABLESWITCH_I64 in wasm32 mode // currently. +// Set TSFlags{0} to 1 to indicate that the variable_ops are immediates. +// Set TSFlags{1} to 1 to indicate that the immediates represent labels. let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { def TABLESWITCH_I32 : I<(outs), (ins I32:$index, bb_op:$default, variable_ops), [(WebAssemblytableswitch I32:$index, bb:$default)], - "tableswitch\t$index, $default">; + "tableswitch\t$index, $default"> { + let TSFlags{0} = 1; + let TSFlags{1} = 1; +} def TABLESWITCH_I64 : I<(outs), (ins I64:$index, bb_op:$default, variable_ops), [(WebAssemblytableswitch I64:$index, bb:$default)], - "tableswitch\t$index, $default">; + "tableswitch\t$index, $default"> { + let TSFlags{0} = 1; + let TSFlags{1} = 1; +} } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 -// Placemarkers to indicate the start of a block or loop scope. These +// Placemarkers to indicate the start or end of a block or loop scope. These // use/clobber EXPR_STACK to prevent them from being moved into the middle of // an expression tree. let Uses = [EXPR_STACK], Defs = [EXPR_STACK] in { -def BLOCK : I<(outs), (ins bb_op:$dst), [], "block \t$dst">; -def LOOP : I<(outs), (ins bb_op:$dst), [], "loop \t$dst">; +def BLOCK : I<(outs), (ins), [], "block">; +def LOOP : I<(outs), (ins), [], "loop">; +def END_BLOCK : I<(outs), (ins), [], "end_block">; +def END_LOOP : I<(outs), (ins), [], "end_loop">; } // Uses = [EXPR_STACK], Defs = [EXPR_STACK] -// No-op to indicate to the AsmPrinter that a loop ends here, so a -// basic block label is needed even if it wouldn't otherwise appear so. -let isTerminator = 1, hasCtrlDep = 1 in -def LOOP_END : I<(outs), (ins), []>; - multiclass RETURN<WebAssemblyRegClass vt> { def RETURN_#vt : I<(outs), (ins vt:$val), [(WebAssemblyreturn vt:$val)], "return \t$val">; diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index 5e7663c..028e9af 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -74,6 +74,9 @@ bool WebAssemblyInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, case WebAssembly::BR_IF: if (HaveCond) return true; + // If we're running after CFGStackify, we can't optimize further. + if (!MI.getOperand(1).isMBB()) + return true; Cond.push_back(MachineOperand::CreateImm(true)); Cond.push_back(MI.getOperand(0)); TBB = MI.getOperand(1).getMBB(); @@ -82,12 +85,18 @@ bool WebAssemblyInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, case WebAssembly::BR_UNLESS: if (HaveCond) return true; + // If we're running after CFGStackify, we can't optimize further. + if (!MI.getOperand(1).isMBB()) + return true; Cond.push_back(MachineOperand::CreateImm(false)); Cond.push_back(MI.getOperand(0)); TBB = MI.getOperand(1).getMBB(); HaveCond = true; break; case WebAssembly::BR: + // If we're running after CFGStackify, we can't optimize further. + if (!MI.getOperand(0).isMBB()) + return true; if (!HaveCond) TBB = MI.getOperand(0).getMBB(); else diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index f0b4ce7..2e682a4 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -66,8 +66,18 @@ def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper", // WebAssembly-specific Operands. //===----------------------------------------------------------------------===// +let OperandNamespace = "WebAssembly" in { + +let OperandType = "OPERAND_BASIC_BLOCK" in def bb_op : Operand<OtherVT>; +let OperandType = "OPERAND_FPIMM" in { +def f32imm_op : Operand<f32>; +def f64imm_op : Operand<f64>; +} // OperandType = "OPERAND_FPIMM" + +} // OperandNamespace = "WebAssembly" + //===----------------------------------------------------------------------===// // WebAssembly Instruction Format Definitions. //===----------------------------------------------------------------------===// @@ -120,31 +130,20 @@ def CONST_I32 : I<(outs I32:$res), (ins i32imm:$imm), def CONST_I64 : I<(outs I64:$res), (ins i64imm:$imm), [(set I64:$res, imm:$imm)], "i64.const\t$res, $imm">; -def CONST_F32 : I<(outs F32:$res), (ins f32imm:$imm), +def CONST_F32 : I<(outs F32:$res), (ins f32imm_op:$imm), [(set F32:$res, fpimm:$imm)], "f32.const\t$res, $imm">; -def CONST_F64 : I<(outs F64:$res), (ins f64imm:$imm), +def CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm), [(set F64:$res, fpimm:$imm)], "f64.const\t$res, $imm">; } // isMoveImm = 1 } // Defs = [ARGUMENTS] -def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$dst)), - (CONST_I32 tglobaladdr:$dst)>; -def : Pat<(i32 (WebAssemblywrapper texternalsym:$dst)), - (CONST_I32 texternalsym:$dst)>; -def : Pat<(i32 (WebAssemblywrapper tjumptable:$dst)), - (CONST_I32 tjumptable:$dst)>; - -let Defs = [ARGUMENTS] in { - -// Function signature and local variable declaration "instructions". -def PARAM : I<(outs), (ins variable_ops), [], ".param \t">; -def RESULT : I<(outs), (ins variable_ops), [], ".result \t">; -def LOCAL : I<(outs), (ins variable_ops), [], ".local \t">; - -} // Defs = [ARGUMENTS] +def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)), + (CONST_I32 tglobaladdr:$addr)>; +def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)), + (CONST_I32 texternalsym:$addr)>; //===----------------------------------------------------------------------===// // Additional sets of instructions. diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index 74ec45d..b39ac52 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -24,10 +24,25 @@ // WebAssembly constant offsets are performed as unsigned with infinite // precision, so we need to check for NoUnsignedWrap so that we don't fold an // offset for an add that needs wrapping. -def regPlusImm : PatFrag<(ops node:$off, node:$addr), +def regPlusImm : PatFrag<(ops node:$addr, node:$off), (add node:$addr, node:$off), [{ return N->getFlags()->hasNoUnsignedWrap(); }]>; +// GlobalAddresses are conceptually unsigned values, so we can also fold them +// into immediate values as long as their offsets are non-negative. +def regPlusGA : PatFrag<(ops node:$addr, node:$off), + (add node:$addr, node:$off), + [{ + return N->getFlags()->hasNoUnsignedWrap() || + (N->getOperand(1)->getOpcode() == WebAssemblyISD::Wrapper && + isa<GlobalAddressSDNode>(N->getOperand(1)->getOperand(0)) && + cast<GlobalAddressSDNode>(N->getOperand(1)->getOperand(0)) + ->getOffset() >= 0); +}]>; + +// We don't need a regPlusES because external symbols never have constant +// offsets folded into them, so we can just use add. + let Defs = [ARGUMENTS] in { // Basic load. @@ -49,29 +64,33 @@ def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr)>; def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr)>; // Select loads with a constant offset. -def : Pat<(i32 (load (regPlusImm imm:$off, I32:$addr))), +def : Pat<(i32 (load (regPlusImm I32:$addr, imm:$off))), (LOAD_I32 imm:$off, $addr)>; -def : Pat<(i64 (load (regPlusImm imm:$off, I32:$addr))), +def : Pat<(i64 (load (regPlusImm I32:$addr, imm:$off))), (LOAD_I64 imm:$off, $addr)>; -def : Pat<(f32 (load (regPlusImm imm:$off, I32:$addr))), +def : Pat<(f32 (load (regPlusImm I32:$addr, imm:$off))), (LOAD_F32 imm:$off, $addr)>; -def : Pat<(f64 (load (regPlusImm imm:$off, I32:$addr))), +def : Pat<(f64 (load (regPlusImm I32:$addr, imm:$off))), (LOAD_F64 imm:$off, $addr)>; -def : Pat<(i32 (load (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(i32 (load (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD_I32 tglobaladdr:$off, $addr)>; -def : Pat<(i64 (load (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(i64 (load (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD_I64 tglobaladdr:$off, $addr)>; -def : Pat<(f32 (load (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(f32 (load (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD_F32 tglobaladdr:$off, $addr)>; -def : Pat<(f64 (load (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(f64 (load (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD_F64 tglobaladdr:$off, $addr)>; -def : Pat<(i32 (load (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(i32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), (LOAD_I32 texternalsym:$off, $addr)>; -def : Pat<(i64 (load (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(i64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), (LOAD_I64 texternalsym:$off, $addr)>; -def : Pat<(f32 (load (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(f32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), (LOAD_F32 texternalsym:$off, $addr)>; -def : Pat<(f64 (load (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(f64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), (LOAD_F64 texternalsym:$off, $addr)>; // Select loads with just a constant offset. @@ -135,65 +154,85 @@ def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr)>; def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>; // Select extending loads with a constant offset. -def : Pat<(i32 (sextloadi8 (regPlusImm imm:$off, I32:$addr))), +def : Pat<(i32 (sextloadi8 (regPlusImm I32:$addr, imm:$off))), (LOAD8_S_I32 imm:$off, $addr)>; -def : Pat<(i32 (zextloadi8 (regPlusImm imm:$off, I32:$addr))), +def : Pat<(i32 (zextloadi8 (regPlusImm I32:$addr, imm:$off))), (LOAD8_U_I32 imm:$off, $addr)>; -def : Pat<(i32 (sextloadi16 (regPlusImm imm:$off, I32:$addr))), +def : Pat<(i32 (sextloadi16 (regPlusImm I32:$addr, imm:$off))), (LOAD16_S_I32 imm:$off, $addr)>; -def : Pat<(i32 (zextloadi16 (regPlusImm imm:$off, I32:$addr))), +def : Pat<(i32 (zextloadi16 (regPlusImm I32:$addr, imm:$off))), (LOAD16_U_I32 imm:$off, $addr)>; -def : Pat<(i64 (sextloadi8 (regPlusImm imm:$off, I32:$addr))), +def : Pat<(i64 (sextloadi8 (regPlusImm I32:$addr, imm:$off))), (LOAD8_S_I64 imm:$off, $addr)>; -def : Pat<(i64 (zextloadi8 (regPlusImm imm:$off, I32:$addr))), +def : Pat<(i64 (zextloadi8 (regPlusImm I32:$addr, imm:$off))), (LOAD8_U_I64 imm:$off, $addr)>; -def : Pat<(i64 (sextloadi16 (regPlusImm imm:$off, I32:$addr))), +def : Pat<(i64 (sextloadi16 (regPlusImm I32:$addr, imm:$off))), (LOAD16_S_I64 imm:$off, $addr)>; -def : Pat<(i64 (zextloadi16 (regPlusImm imm:$off, I32:$addr))), +def : Pat<(i64 (zextloadi16 (regPlusImm I32:$addr, imm:$off))), (LOAD16_U_I64 imm:$off, $addr)>; -def : Pat<(i64 (sextloadi32 (regPlusImm imm:$off, I32:$addr))), +def : Pat<(i64 (sextloadi32 (regPlusImm I32:$addr, imm:$off))), (LOAD32_S_I64 imm:$off, $addr)>; -def : Pat<(i64 (zextloadi32 (regPlusImm imm:$off, I32:$addr))), +def : Pat<(i64 (zextloadi32 (regPlusImm I32:$addr, imm:$off))), (LOAD32_U_I64 imm:$off, $addr)>; -def : Pat<(i32 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(i32 (sextloadi8 (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD8_S_I32 tglobaladdr:$off, $addr)>; -def : Pat<(i32 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(i32 (zextloadi8 (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD8_U_I32 tglobaladdr:$off, $addr)>; -def : Pat<(i32 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(i32 (sextloadi16 (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD16_S_I32 tglobaladdr:$off, $addr)>; -def : Pat<(i32 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(i32 (zextloadi16 (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD16_U_I32 tglobaladdr:$off, $addr)>; -def : Pat<(i64 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(i64 (sextloadi8 (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD8_S_I64 tglobaladdr:$off, $addr)>; -def : Pat<(i64 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(i64 (zextloadi8 (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD8_U_I64 tglobaladdr:$off, $addr)>; -def : Pat<(i64 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(i64 (sextloadi16 (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD16_S_I64 tglobaladdr:$off, $addr)>; -def : Pat<(i64 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(i64 (zextloadi16 (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD16_U_I64 tglobaladdr:$off, $addr)>; -def : Pat<(i64 (sextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(i64 (sextloadi32 (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD32_S_I64 tglobaladdr:$off, $addr)>; -def : Pat<(i64 (zextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(i64 (zextloadi32 (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD32_U_I64 tglobaladdr:$off, $addr)>; -def : Pat<(i32 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(i32 (sextloadi8 (add I32:$addr, + (WebAssemblywrapper texternalsym:$off)))), (LOAD8_S_I32 texternalsym:$off, $addr)>; -def : Pat<(i32 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(i32 (zextloadi8 (add I32:$addr, + (WebAssemblywrapper texternalsym:$off)))), (LOAD8_U_I32 texternalsym:$off, $addr)>; -def : Pat<(i32 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(i32 (sextloadi16 (add I32:$addr, + (WebAssemblywrapper texternalsym:$off)))), (LOAD16_S_I32 texternalsym:$off, $addr)>; -def : Pat<(i32 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(i32 (zextloadi16 (add I32:$addr, + (WebAssemblywrapper texternalsym:$off)))), (LOAD16_U_I32 texternalsym:$off, $addr)>; -def : Pat<(i64 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(i64 (sextloadi8 (add I32:$addr, + (WebAssemblywrapper texternalsym:$off)))), (LOAD8_S_I64 texternalsym:$off, $addr)>; -def : Pat<(i64 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(i64 (zextloadi8 (add I32:$addr, + (WebAssemblywrapper texternalsym:$off)))), (LOAD8_U_I64 texternalsym:$off, $addr)>; -def : Pat<(i64 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(i64 (sextloadi16 (add I32:$addr, + (WebAssemblywrapper texternalsym:$off)))), (LOAD16_S_I64 texternalsym:$off, $addr)>; -def : Pat<(i64 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(i64 (zextloadi16 (add I32:$addr, + (WebAssemblywrapper texternalsym:$off)))), (LOAD16_U_I64 texternalsym:$off, $addr)>; -def : Pat<(i64 (sextloadi32 (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(i64 (sextloadi32 (add I32:$addr, + (WebAssemblywrapper texternalsym:$off)))), (LOAD32_S_I64 texternalsym:$off, $addr)>; -def : Pat<(i64 (zextloadi32 (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(i64 (zextloadi32 (add I32:$addr, + (WebAssemblywrapper texternalsym:$off)))), (LOAD32_U_I64 texternalsym:$off, $addr)>; // Select extending loads with just a constant offset. @@ -259,35 +298,45 @@ def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>; def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>; // Select "don't care" extending loads with a constant offset. -def : Pat<(i32 (extloadi8 (regPlusImm imm:$off, I32:$addr))), +def : Pat<(i32 (extloadi8 (regPlusImm I32:$addr, imm:$off))), (LOAD8_U_I32 imm:$off, $addr)>; -def : Pat<(i32 (extloadi16 (regPlusImm imm:$off, I32:$addr))), +def : Pat<(i32 (extloadi16 (regPlusImm I32:$addr, imm:$off))), (LOAD16_U_I32 imm:$off, $addr)>; -def : Pat<(i64 (extloadi8 (regPlusImm imm:$off, I32:$addr))), +def : Pat<(i64 (extloadi8 (regPlusImm I32:$addr, imm:$off))), (LOAD8_U_I64 imm:$off, $addr)>; -def : Pat<(i64 (extloadi16 (regPlusImm imm:$off, I32:$addr))), +def : Pat<(i64 (extloadi16 (regPlusImm I32:$addr, imm:$off))), (LOAD16_U_I64 imm:$off, $addr)>; -def : Pat<(i64 (extloadi32 (regPlusImm imm:$off, I32:$addr))), +def : Pat<(i64 (extloadi32 (regPlusImm I32:$addr, imm:$off))), (LOAD32_U_I64 imm:$off, $addr)>; -def : Pat<(i32 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(i32 (extloadi8 (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD8_U_I32 tglobaladdr:$off, $addr)>; -def : Pat<(i32 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(i32 (extloadi16 (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD16_U_I32 tglobaladdr:$off, $addr)>; -def : Pat<(i64 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(i64 (extloadi8 (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD8_U_I64 tglobaladdr:$off, $addr)>; -def : Pat<(i64 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(i64 (extloadi16 (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD16_U_I64 tglobaladdr:$off, $addr)>; -def : Pat<(i64 (extloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))), +def : Pat<(i64 (extloadi32 (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off)))), (LOAD32_U_I64 tglobaladdr:$off, $addr)>; -def : Pat<(i32 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(i32 (extloadi8 (add I32:$addr, + (WebAssemblywrapper texternalsym:$off)))), (LOAD8_U_I32 texternalsym:$off, $addr)>; -def : Pat<(i32 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(i32 (extloadi16 (add I32:$addr, + (WebAssemblywrapper texternalsym:$off)))), (LOAD16_U_I32 texternalsym:$off, $addr)>; -def : Pat<(i64 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(i64 (extloadi8 (add I32:$addr, + (WebAssemblywrapper texternalsym:$off)))), (LOAD8_U_I64 texternalsym:$off, $addr)>; -def : Pat<(i64 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(i64 (extloadi16 (add I32:$addr, + (WebAssemblywrapper texternalsym:$off)))), (LOAD16_U_I64 texternalsym:$off, $addr)>; -def : Pat<(i64 (extloadi32 (regPlusImm texternalsym:$off, I32:$addr))), +def : Pat<(i64 (extloadi32 (add I32:$addr, + (WebAssemblywrapper texternalsym:$off)))), (LOAD32_U_I64 texternalsym:$off, $addr)>; // Select "don't care" extending loads with just a constant offset. @@ -343,29 +392,37 @@ def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, F32:$val)>; def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, F64:$val)>; // Select stores with a constant offset. -def : Pat<(store I32:$val, (regPlusImm imm:$off, I32:$addr)), +def : Pat<(store I32:$val, (regPlusImm I32:$addr, imm:$off)), (STORE_I32 imm:$off, I32:$addr, I32:$val)>; -def : Pat<(store I64:$val, (regPlusImm imm:$off, I32:$addr)), +def : Pat<(store I64:$val, (regPlusImm I32:$addr, imm:$off)), (STORE_I64 imm:$off, I32:$addr, I64:$val)>; -def : Pat<(store F32:$val, (regPlusImm imm:$off, I32:$addr)), +def : Pat<(store F32:$val, (regPlusImm I32:$addr, imm:$off)), (STORE_F32 imm:$off, I32:$addr, F32:$val)>; -def : Pat<(store F64:$val, (regPlusImm imm:$off, I32:$addr)), +def : Pat<(store F64:$val, (regPlusImm I32:$addr, imm:$off)), (STORE_F64 imm:$off, I32:$addr, F64:$val)>; -def : Pat<(store I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), +def : Pat<(store I32:$val, (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off))), (STORE_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; -def : Pat<(store I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), +def : Pat<(store I64:$val, (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off))), (STORE_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; -def : Pat<(store F32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), +def : Pat<(store F32:$val, (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off))), (STORE_F32 tglobaladdr:$off, I32:$addr, F32:$val)>; -def : Pat<(store F64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), +def : Pat<(store F64:$val, (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off))), (STORE_F64 tglobaladdr:$off, I32:$addr, F64:$val)>; -def : Pat<(store I32:$val, (regPlusImm texternalsym:$off, I32:$addr)), +def : Pat<(store I32:$val, (add I32:$addr, + (WebAssemblywrapper texternalsym:$off))), (STORE_I32 texternalsym:$off, I32:$addr, I32:$val)>; -def : Pat<(store I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), +def : Pat<(store I64:$val, (add I32:$addr, + (WebAssemblywrapper texternalsym:$off))), (STORE_I64 texternalsym:$off, I32:$addr, I64:$val)>; -def : Pat<(store F32:$val, (regPlusImm texternalsym:$off, I32:$addr)), +def : Pat<(store F32:$val, (add I32:$addr, + (WebAssemblywrapper texternalsym:$off))), (STORE_F32 texternalsym:$off, I32:$addr, F32:$val)>; -def : Pat<(store F64:$val, (regPlusImm texternalsym:$off, I32:$addr)), +def : Pat<(store F64:$val, (add I32:$addr, + (WebAssemblywrapper texternalsym:$off))), (STORE_F64 texternalsym:$off, I32:$addr, F64:$val)>; // Select stores with just a constant offset. @@ -423,35 +480,54 @@ def : Pat<(truncstorei32 I64:$val, I32:$addr), (STORE32_I64 0, I32:$addr, I64:$val)>; // Select truncating stores with a constant offset. -def : Pat<(truncstorei8 I32:$val, (regPlusImm imm:$off, I32:$addr)), +def : Pat<(truncstorei8 I32:$val, (regPlusImm I32:$addr, imm:$off)), (STORE8_I32 imm:$off, I32:$addr, I32:$val)>; -def : Pat<(truncstorei16 I32:$val, (regPlusImm imm:$off, I32:$addr)), +def : Pat<(truncstorei16 I32:$val, (regPlusImm I32:$addr, imm:$off)), (STORE16_I32 imm:$off, I32:$addr, I32:$val)>; -def : Pat<(truncstorei8 I64:$val, (regPlusImm imm:$off, I32:$addr)), +def : Pat<(truncstorei8 I64:$val, (regPlusImm I32:$addr, imm:$off)), (STORE8_I64 imm:$off, I32:$addr, I64:$val)>; -def : Pat<(truncstorei16 I64:$val, (regPlusImm imm:$off, I32:$addr)), +def : Pat<(truncstorei16 I64:$val, (regPlusImm I32:$addr, imm:$off)), (STORE16_I64 imm:$off, I32:$addr, I64:$val)>; -def : Pat<(truncstorei32 I64:$val, (regPlusImm imm:$off, I32:$addr)), +def : Pat<(truncstorei32 I64:$val, (regPlusImm I32:$addr, imm:$off)), (STORE32_I64 imm:$off, I32:$addr, I64:$val)>; -def : Pat<(truncstorei8 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), +def : Pat<(truncstorei8 I32:$val, + (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off))), (STORE8_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; -def : Pat<(truncstorei16 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), +def : Pat<(truncstorei16 I32:$val, + (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off))), (STORE16_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; -def : Pat<(truncstorei8 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), +def : Pat<(truncstorei8 I64:$val, + (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off))), (STORE8_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; -def : Pat<(truncstorei16 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), +def : Pat<(truncstorei16 I64:$val, + (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off))), (STORE16_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; -def : Pat<(truncstorei32 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), +def : Pat<(truncstorei32 I64:$val, + (regPlusGA I32:$addr, + (WebAssemblywrapper tglobaladdr:$off))), (STORE32_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; -def : Pat<(truncstorei8 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)), +def : Pat<(truncstorei8 I32:$val, (add I32:$addr, + (WebAssemblywrapper texternalsym:$off))), (STORE8_I32 texternalsym:$off, I32:$addr, I32:$val)>; -def : Pat<(truncstorei16 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)), +def : Pat<(truncstorei16 I32:$val, + (add I32:$addr, + (WebAssemblywrapper texternalsym:$off))), (STORE16_I32 texternalsym:$off, I32:$addr, I32:$val)>; -def : Pat<(truncstorei8 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), +def : Pat<(truncstorei8 I64:$val, + (add I32:$addr, + (WebAssemblywrapper texternalsym:$off))), (STORE8_I64 texternalsym:$off, I32:$addr, I64:$val)>; -def : Pat<(truncstorei16 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), +def : Pat<(truncstorei16 I64:$val, + (add I32:$addr, + (WebAssemblywrapper texternalsym:$off))), (STORE16_I64 texternalsym:$off, I32:$addr, I64:$val)>; -def : Pat<(truncstorei32 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), +def : Pat<(truncstorei32 I64:$val, + (add I32:$addr, + (WebAssemblywrapper texternalsym:$off))), (STORE32_I64 texternalsym:$off, I32:$addr, I64:$val)>; // Select truncating stores with just a constant offset. diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp index a953f82..022a448 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -36,15 +36,17 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol( return Printer.GetExternalSymbolSymbol(MO.getSymbolName()); } -MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(const MachineOperand &MO, - MCSymbol *Sym) const { - assert(MO.getTargetFlags() == 0 && "WebAssembly does not use target flags"); +MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym, + int64_t Offset, + bool IsFunc) const { + MCSymbolRefExpr::VariantKind VK = + IsFunc ? MCSymbolRefExpr::VK_WebAssembly_FUNCTION + : MCSymbolRefExpr::VK_None; + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, VK, Ctx); - const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); - - int64_t Offset = MO.getOffset(); if (Offset != 0) { - assert(!MO.isJTI() && "Unexpected offset with jump table index"); + if (IsFunc) + report_fatal_error("Function addresses with offsets not supported"); Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx); } @@ -64,6 +66,9 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI, default: MI->dump(); llvm_unreachable("unknown operand type"); + case MachineOperand::MO_MachineBasicBlock: + MI->dump(); + llvm_unreachable("MachineBasicBlock operand should have been rewritten"); case MachineOperand::MO_Register: { // Ignore all implicit register operands. if (MO.isImplicit()) @@ -89,15 +94,19 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI, llvm_unreachable("unknown floating point immediate type"); break; } - case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::createExpr( - MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx)); - break; case MachineOperand::MO_GlobalAddress: - MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); + assert(MO.getTargetFlags() == 0 && + "WebAssembly does not use target flags on GlobalAddresses"); + MCOp = LowerSymbolOperand(GetGlobalAddressSymbol(MO), MO.getOffset(), + MO.getGlobal()->getValueType()->isFunctionTy()); break; case MachineOperand::MO_ExternalSymbol: - MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); + // The target flag indicates whether this is a symbol for a + // variable or a function. + assert((MO.getTargetFlags() & -2) == 0 && + "WebAssembly uses only one target flag bit on ExternalSymbols"); + MCOp = LowerSymbolOperand(GetExternalSymbolSymbol(MO), /*Offset=*/0, + MO.getTargetFlags() & 1); break; } diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h index 6d70470..ab4ba1c 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h @@ -31,9 +31,10 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower { MCContext &Ctx; AsmPrinter &Printer; - MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; + MCOperand LowerSymbolOperand(MCSymbol *Sym, int64_t Offset, + bool IsFunc) const; public: WebAssemblyMCInstLower(MCContext &ctx, AsmPrinter &printer) diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index 89ef5cd..537c147 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -147,8 +147,10 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { // block boundaries, and the blocks aren't ordered so the block visitation // order isn't significant, but we may want to change this in the future. for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : reverse(MBB)) { - MachineInstr *Insert = &MI; + // Don't use a range-based for loop, because we modify the list as we're + // iterating over it and the end iterator may change. + for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) { + MachineInstr *Insert = &*MII; // Don't nest anything inside a phi. if (Insert->getOpcode() == TargetOpcode::PHI) break; @@ -221,7 +223,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { Insert = Def; } if (AnyStackified) - ImposeStackOrdering(&MI); + ImposeStackOrdering(&*MII); } } diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp index dcada45..90d8dda 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -61,17 +61,23 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex( MachineFunction &MF = *MBB.getParent(); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); const MachineFrameInfo& MFI = *MF.getFrameInfo(); - int FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex); + int64_t FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex); if (MI.mayLoadOrStore()) { // If this is a load or store, make it relative to SP and fold the frame - // offset directly in - assert(MI.getOperand(1).getImm() == 0 && - "Can't eliminate FI yet if offset is already set"); - MI.getOperand(1).setImm(FrameOffset); + // offset directly in. + assert(FrameOffset >= 0 && MI.getOperand(1).getImm() >= 0); + int64_t Offset = MI.getOperand(1).getImm() + FrameOffset; + + if (static_cast<uint64_t>(Offset) > std::numeric_limits<uint32_t>::max()) { + // If this happens the program is invalid, but better to error here than + // generate broken code. + report_fatal_error("Memory offset field overflow"); + } + MI.getOperand(1).setImm(Offset); MI.getOperand(2).ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false); } else { - // Otherwise create an i32.add SP, offset and make it the operand + // Otherwise create an i32.add SP, offset and make it the operand. auto &MRI = MF.getRegInfo(); const auto *TII = MF.getSubtarget().getInstrInfo(); diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index e31ea46..b290b4b 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -45,8 +45,9 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine( const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT.isArch64Bit() ? "e-p:64:64-i64:64-n32:64-S128" - : "e-p:32:32-i64:64-n32:64-S128", + : LLVMTargetMachine(T, + TT.isArch64Bit() ? "e-m:e-p:64:64-i64:64-n32:64-S128" + : "e-m:e-p:32:32-i64:64-n32:64-S128", TT, CPU, FS, Options, RM, CM, OL), TLOF(make_unique<WebAssemblyTargetObjectFile>()) { // WebAssembly type-checks expressions, but a noreturn function with a return diff --git a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt index 92ecde3..91b3fff 100644 --- a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt +++ b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt @@ -5,23 +5,6 @@ pr38151.c va-arg-22.c -# WebAssemblyRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator, int, unsigned int, llvm::RegScavenger *) const: Assertion `MI.getOperand(1).getImm() == 0 && "Can't eliminate FI yet if offset is already set"' -20030313-1.c -20030916-1.c -20031012-1.c -20041126-1.c -20060420-1.c -20071202-1.c -20120808-1.c -pr20527-1.c -pr27073.c -pr36339.c -pr37573.c -pr43236.c -pr43835.c -pr45070.c -pr51933.c - # TargetRegisterInfo.h:315: static unsigned int llvm::TargetRegisterInfo::virtReg2Index(unsigned int): Assertion `isVirtualRegister(Reg) && "Not a virtual register"' failed. struct-ret-1.c va-arg-11.c @@ -140,8 +123,6 @@ pr38051.c pr39100.c pr39339.c -pr40022.c -pr40657.c pr43987.c diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h index fbec662..01e65b8 100644 --- a/contrib/llvm/lib/Target/X86/X86.h +++ b/contrib/llvm/lib/Target/X86/X86.h @@ -29,7 +29,7 @@ FunctionPass *createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel); /// This pass initializes a global base register for PIC on x86-32. -FunctionPass* createX86GlobalBaseRegPass(); +FunctionPass *createX86GlobalBaseRegPass(); /// This pass combines multiple accesses to local-dynamic TLS variables so that /// the TLS base address for the module is only fetched once per execution path @@ -49,12 +49,13 @@ FunctionPass *createX86IssueVZeroUpperPass(); /// This will prevent a stall when returning on the Atom. FunctionPass *createX86PadShortFunctions(); -/// Return a a pass that selectively replaces certain instructions (like add, +/// Return a pass that selectively replaces certain instructions (like add, /// sub, inc, dec, some shifts, and some multiplies) by equivalent LEA /// instructions, in order to eliminate execution delays in some processors. FunctionPass *createX86FixupLEAs(); -/// Return a pass that removes redundant address recalculations. +/// Return a pass that removes redundant LEA instructions and redundant address +/// recalculations. FunctionPass *createX86OptimizeLEAs(); /// Return a pass that optimizes the code-size of x86 call sequences. This is diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td index 54d88cb..e8b96e7 100644 --- a/contrib/llvm/lib/Target/X86/X86CallingConv.td +++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td @@ -831,6 +831,12 @@ def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15, def CSR_64_TLS_Darwin : CalleeSavedRegs<(add CSR_64, RCX, RDX, RSI, R8, R9, R10, R11)>; +// CSRs that are handled by prologue, epilogue. +def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add)>; + +// CSRs that are handled explicitly via copies. +def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(add CSR_64_TLS_Darwin)>; + // All GPRs - except r11 def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI, R8, R9, R10, RSP)>; diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp index 629d4d3..f48b479 100644 --- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp +++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp @@ -1002,6 +1002,9 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { if (!FuncInfo.CanLowerReturn) return false; + if (TLI.supportSplitCSR(FuncInfo.MF)) + return false; + CallingConv::ID CC = F.getCallingConv(); if (CC != CallingConv::C && CC != CallingConv::Fast && diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index d31aab0..1ec93b5 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -265,7 +265,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // Without SSE, i64->f64 goes through memory. setOperationAction(ISD::BITCAST , MVT::i64 , Expand); } - } + } else if (!Subtarget->is64Bit()) + setOperationAction(ISD::BITCAST , MVT::i64 , Custom); // Scalar integer divide and remainder are lowered to use operations that // produce two results, to match the available instructions. This exposes @@ -2310,6 +2311,18 @@ X86TargetLowering::LowerReturn(SDValue Chain, DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); } + const X86RegisterInfo *TRI = Subtarget->getRegisterInfo(); + const MCPhysReg *I = + TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); + if (I) { + for (; *I; ++I) { + if (X86::GR64RegClass.contains(*I)) + RetOps.push_back(DAG.getRegister(*I, MVT::i64)); + else + llvm_unreachable("Unexpected register class in CSRsViaCopy!"); + } + } + RetOps[0] = Chain; // Update chain. // Add the flag if we have it. @@ -3907,6 +3920,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: case X86ISD::SHUFP: + case X86ISD::INSERTPS: case X86ISD::PALIGNR: case X86ISD::MOVLHPS: case X86ISD::MOVLHPD: @@ -4157,6 +4171,35 @@ static bool hasFPCMov(unsigned X86CC) { } } + +bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, + unsigned Intrinsic) const { + + const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic); + if (!IntrData) + return false; + + switch (IntrData->Type) { + case LOADA: + case LOADU: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(I.getType()); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = (IntrData->Type == LOADA ? Info.memVT.getSizeInBits()/8 : 1); + Info.vol = false; + Info.readMem = true; + Info.writeMem = false; + return true; + } + default: + break; + } + + return false; +} + /// Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. @@ -4743,8 +4786,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, /// uses one source. Note that this will set IsUnary for shuffles which use a /// single input multiple times, and in those cases it will /// adjust the mask to only have indices within that single input. -/// FIXME: Add support for Decode*Mask functions that return SM_SentinelZero. -static bool getTargetShuffleMask(SDNode *N, MVT VT, +static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, SmallVectorImpl<int> &Mask, bool &IsUnary) { unsigned NumElems = VT.getVectorNumElements(); SDValue ImmN; @@ -4761,6 +4803,11 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; + case X86ISD::INSERTPS: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); + IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); + break; case X86ISD::UNPCKH: DecodeUNPCKHMask(VT, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); @@ -4870,10 +4917,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, case X86ISD::VPERM2X128: ImmN = N->getOperand(N->getNumOperands()-1); DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); - // Mask only contains negative index if an element is zero. - if (std::any_of(Mask.begin(), Mask.end(), - [](int M){ return M == SM_SentinelZero; })) - return false; + IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::MOVSLDUP: DecodeMOVSLDUPMask(VT, Mask); @@ -5008,6 +5052,12 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, if (Mask.empty()) return false; + // Check if we're getting a shuffle mask with zero'd elements. + if (!AllowSentinelZero) + if (std::any_of(Mask.begin(), Mask.end(), + [](int M){ return M == SM_SentinelZero; })) + return false; + // If we have a fake unary shuffle, the shuffle mask is spread across two // inputs that are actually the same node. Re-map the mask to always point // into the first input. @@ -5046,19 +5096,19 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, // Recurse into target specific vector shuffles to find scalars. if (isTargetShuffle(Opcode)) { MVT ShufVT = V.getSimpleValueType(); - unsigned NumElems = ShufVT.getVectorNumElements(); + int NumElems = (int)ShufVT.getVectorNumElements(); SmallVector<int, 16> ShuffleMask; bool IsUnary; - if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary)) + if (!getTargetShuffleMask(N, ShufVT, false, ShuffleMask, IsUnary)) return SDValue(); int Elt = ShuffleMask[Index]; - if (Elt < 0) + if (Elt == SM_SentinelUndef) return DAG.getUNDEF(ShufVT.getVectorElementType()); - SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0) - : N->getOperand(1); + assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range"); + SDValue NewV = (Elt < NumElems) ? N->getOperand(0) : N->getOperand(1); return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1); } @@ -8165,6 +8215,13 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, DL, VT, V.getOperand(0), BroadcastIdx, Subtarget, DAG)) return TruncBroadcast; + MVT BroadcastVT = VT; + + // Peek through any bitcast (only useful for loads). + SDValue BC = V; + while (BC.getOpcode() == ISD::BITCAST) + BC = BC.getOperand(0); + // Also check the simpler case, where we can directly reuse the scalar. if (V.getOpcode() == ISD::BUILD_VECTOR || (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) { @@ -8174,13 +8231,17 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, // Only AVX2 has register broadcasts. if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V)) return SDValue(); - } else if (MayFoldLoad(V) && !cast<LoadSDNode>(V)->isVolatile()) { + } else if (MayFoldLoad(BC) && !cast<LoadSDNode>(BC)->isVolatile()) { + // 32-bit targets need to load i64 as a f64 and then bitcast the result. + if (!Subtarget->is64Bit() && VT.getScalarType() == MVT::i64) + BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements()); + // If we are broadcasting a load that is only used by the shuffle // then we can reduce the vector load to the broadcasted scalar load. - LoadSDNode *Ld = cast<LoadSDNode>(V); + LoadSDNode *Ld = cast<LoadSDNode>(BC); SDValue BaseAddr = Ld->getOperand(1); EVT AddrVT = BaseAddr.getValueType(); - EVT SVT = VT.getScalarType(); + EVT SVT = BroadcastVT.getScalarType(); unsigned Offset = BroadcastIdx * SVT.getStoreSize(); SDValue NewAddr = DAG.getNode( ISD::ADD, DL, AddrVT, BaseAddr, @@ -8194,7 +8255,8 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, return SDValue(); } - return DAG.getNode(X86ISD::VBROADCAST, DL, VT, V); + V = DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, V); + return DAG.getBitcast(VT, V); } // Check for whether we can use INSERTPS to perform the shuffle. We only use @@ -12474,8 +12536,12 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // location. SDValue Chain = DAG.getEntryNode(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, DL, true), DL); SDValue Args[] = { Chain, Offset }; Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args); + Chain = + DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true), + DAG.getIntPtrConstant(0, DL, true), SDValue(), DL); // TLSCALL will be codegen'ed as call. Inform MFI that function has calls. MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); @@ -12648,13 +12714,21 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, return Op; } + SDValue ValueToStore = Op.getOperand(0); + if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) && + !Subtarget->is64Bit()) + // Bitcasting to f64 here allows us to do a single 64-bit store from + // an SSE register, avoiding the store forwarding penalty that would come + // with two 32-bit stores. + ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); + unsigned Size = SrcVT.getSizeInBits()/8; MachineFunction &MF = DAG.getMachineFunction(); auto PtrVT = getPointerTy(MF.getDataLayout()); int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); SDValue Chain = DAG.getStore( - DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, + DAG.getEntryNode(), dl, ValueToStore, StackSlot, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), false, false, 0); return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG); @@ -13027,7 +13101,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, } assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP"); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), + SDValue ValueToStore = Op.getOperand(0); + if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget->is64Bit()) + // Bitcasting to f64 here allows us to do a single 64-bit store from + // an SSE register, avoiding the store forwarding penalty that would come + // with two 32-bit stores. + ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, ValueToStore, StackSlot, MachinePointerInfo(), false, false, 0); // For i64 source, we need to add the appropriate power of 2 if the input @@ -17487,7 +17567,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, return DAG.getMergeValues(Results, dl); } case COMPRESS_TO_MEM: { - SDLoc dl(Op); SDValue Mask = Op.getOperand(4); SDValue DataToCompress = Op.getOperand(3); SDValue Addr = Op.getOperand(2); @@ -17513,7 +17592,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, case TRUNCATE_TO_MEM_VI32: return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i32); case EXPAND_FROM_MEM: { - SDLoc dl(Op); SDValue Mask = Op.getOperand(4); SDValue PassThru = Op.getOperand(3); SDValue Addr = Op.getOperand(2); @@ -17533,6 +17611,25 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, Mask, PassThru, Subtarget, DAG), Chain}; return DAG.getMergeValues(Results, dl); } + case LOADU: + case LOADA: { + SDValue Mask = Op.getOperand(4); + SDValue PassThru = Op.getOperand(3); + SDValue Addr = Op.getOperand(2); + SDValue Chain = Op.getOperand(0); + MVT VT = Op.getSimpleValueType(); + + MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op); + assert(MemIntr && "Expected MemIntrinsicSDNode!"); + + if (isAllOnesConstant(Mask)) // return just a load + return DAG.getLoad(VT, dl, Chain, Addr, MemIntr->getMemOperand()); + + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + return DAG.getMaskedLoad(VT, dl, Chain, Addr, VMask, PassThru, VT, + MemIntr->getMemOperand(), ISD::NON_EXTLOAD); + } } } @@ -19512,24 +19609,37 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, MVT SrcVT = Op.getOperand(0).getSimpleValueType(); MVT DstVT = Op.getSimpleValueType(); - if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) { + if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 || + SrcVT == MVT::i64) { assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); if (DstVT != MVT::f64) // This conversion needs to be expanded. return SDValue(); - SDValue InVec = Op->getOperand(0); - SDLoc dl(Op); - unsigned NumElts = SrcVT.getVectorNumElements(); - MVT SVT = SrcVT.getVectorElementType(); - - // Widen the vector in input in the case of MVT::v2i32. - // Example: from MVT::v2i32 to MVT::v4i32. + SDValue Op0 = Op->getOperand(0); SmallVector<SDValue, 16> Elts; - for (unsigned i = 0, e = NumElts; i != e; ++i) - Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, InVec, - DAG.getIntPtrConstant(i, dl))); - + SDLoc dl(Op); + unsigned NumElts; + MVT SVT; + if (SrcVT.isVector()) { + NumElts = SrcVT.getVectorNumElements(); + SVT = SrcVT.getVectorElementType(); + + // Widen the vector in input in the case of MVT::v2i32. + // Example: from MVT::v2i32 to MVT::v4i32. + for (unsigned i = 0, e = NumElts; i != e; ++i) + Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, Op0, + DAG.getIntPtrConstant(i, dl))); + } else { + assert(SrcVT == MVT::i64 && !Subtarget->is64Bit() && + "Unexpected source type in LowerBITCAST"); + Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0, + DAG.getIntPtrConstant(0, dl))); + Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0, + DAG.getIntPtrConstant(1, dl))); + NumElts = 2; + SVT = MVT::i32; + } // Explicitly mark the extra elements as Undef. Elts.append(NumElts, DAG.getUNDEF(SVT)); @@ -20685,6 +20795,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VSHLI: return "X86ISD::VSHLI"; case X86ISD::VSRLI: return "X86ISD::VSRLI"; case X86ISD::VSRAI: return "X86ISD::VSRAI"; + case X86ISD::VROTLI: return "X86ISD::VROTLI"; + case X86ISD::VROTRI: return "X86ISD::VROTRI"; case X86ISD::CMPP: return "X86ISD::CMPP"; case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ"; case X86ISD::PCMPGT: return "X86ISD::PCMPGT"; @@ -23184,7 +23296,7 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, return false; SmallVector<int, 16> OpMask; bool IsUnary; - bool HaveMask = getTargetShuffleMask(Op.getNode(), VT, OpMask, IsUnary); + bool HaveMask = getTargetShuffleMask(Op.getNode(), VT, true, OpMask, IsUnary); // We only can combine unary shuffles which we can decode the mask for. if (!HaveMask || !IsUnary) return false; @@ -23281,7 +23393,7 @@ static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) { MVT VT = N.getSimpleValueType(); SmallVector<int, 4> Mask; bool IsUnary; - bool HaveMask = getTargetShuffleMask(N.getNode(), VT, Mask, IsUnary); + bool HaveMask = getTargetShuffleMask(N.getNode(), VT, false, Mask, IsUnary); (void)HaveMask; assert(HaveMask); @@ -23854,6 +23966,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, SDValue InVec = N->getOperand(0); SDValue EltNo = N->getOperand(1); + EVT EltVT = N->getValueType(0); if (!isa<ConstantSDNode>(EltNo)) return SDValue(); @@ -23882,14 +23995,22 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, SmallVector<int, 16> ShuffleMask; bool UnaryShuffle; - if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), + if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), true, ShuffleMask, UnaryShuffle)) return SDValue(); // Select the input vector, guarding against out of range extract vector. unsigned NumElems = CurrentVT.getVectorNumElements(); int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); - int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt]; + int Idx = (Elt > (int)NumElems) ? SM_SentinelUndef : ShuffleMask[Elt]; + + if (Idx == SM_SentinelZero) + return EltVT.isInteger() ? DAG.getConstant(0, SDLoc(N), EltVT) + : DAG.getConstantFP(+0.0, SDLoc(N), EltVT); + if (Idx == SM_SentinelUndef) + return DAG.getUNDEF(EltVT); + + assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range"); SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); @@ -23914,7 +24035,6 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile()) return SDValue(); - EVT EltVT = N->getValueType(0); // If there's a bitcast before the shuffle, check if the load type and // alignment is valid. unsigned Align = LN0->getAlignment(); @@ -27233,6 +27353,32 @@ static SDValue promoteSextBeforeAddNSW(SDNode *Sext, SelectionDAG &DAG, return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewSext, NewConstant, &Flags); } +/// (i8,i32 {s/z}ext ({s/u}divrem (i8 x, i8 y)) -> +/// (i8,i32 ({s/u}divrem_sext_hreg (i8 x, i8 y) +/// This exposes the {s/z}ext to the sdivrem lowering, so that it directly +/// extends from AH (which we otherwise need to do contortions to access). +static SDValue getDivRem8(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + auto OpcodeN = N->getOpcode(); + auto OpcodeN0 = N0.getOpcode(); + if (!((OpcodeN == ISD::SIGN_EXTEND && OpcodeN0 == ISD::SDIVREM) || + (OpcodeN == ISD::ZERO_EXTEND && OpcodeN0 == ISD::UDIVREM))) + return SDValue(); + + EVT VT = N->getValueType(0); + EVT InVT = N0.getValueType(); + if (N0.getResNo() != 1 || InVT != MVT::i8 || VT != MVT::i32) + return SDValue(); + + SDVTList NodeTys = DAG.getVTList(MVT::i8, VT); + auto DivRemOpcode = OpcodeN0 == ISD::SDIVREM ? X86ISD::SDIVREM8_SEXT_HREG + : X86ISD::UDIVREM8_ZEXT_HREG; + SDValue R = DAG.getNode(DivRemOpcode, SDLoc(N), NodeTys, N0.getOperand(0), + N0.getOperand(1)); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0)); + return R.getValue(1); +} + static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { @@ -27243,18 +27389,8 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, EVT InSVT = InVT.getScalarType(); SDLoc DL(N); - // (i8,i32 sext (sdivrem (i8 x, i8 y)) -> - // (i8,i32 (sdivrem_sext_hreg (i8 x, i8 y) - // This exposes the sext to the sdivrem lowering, so that it directly extends - // from AH (which we otherwise need to do contortions to access). - if (N0.getOpcode() == ISD::SDIVREM && N0.getResNo() == 1 && - InVT == MVT::i8 && VT == MVT::i32) { - SDVTList NodeTys = DAG.getVTList(MVT::i8, VT); - SDValue R = DAG.getNode(X86ISD::SDIVREM8_SEXT_HREG, DL, NodeTys, - N0.getOperand(0), N0.getOperand(1)); - DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0)); - return R.getValue(1); - } + if (SDValue DivRem8 = getDivRem8(N, DAG)) + return DivRem8; if (!DCI.isBeforeLegalizeOps()) { if (InVT == MVT::i1) { @@ -27413,19 +27549,8 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget)) return R; - // (i8,i32 zext (udivrem (i8 x, i8 y)) -> - // (i8,i32 (udivrem_zext_hreg (i8 x, i8 y) - // This exposes the zext to the udivrem lowering, so that it directly extends - // from AH (which we otherwise need to do contortions to access). - if (N0.getOpcode() == ISD::UDIVREM && - N0.getResNo() == 1 && N0.getValueType() == MVT::i8 && - VT == MVT::i32) { - SDVTList NodeTys = DAG.getVTList(MVT::i8, VT); - SDValue R = DAG.getNode(X86ISD::UDIVREM8_ZEXT_HREG, dl, NodeTys, - N0.getOperand(0), N0.getOperand(1)); - DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0)); - return R.getValue(1); - } + if (SDValue DivRem8 = getDivRem8(N, DAG)) + return DivRem8; return SDValue(); } @@ -27923,7 +28048,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FANDN: return PerformFANDNCombine(N, DAG, Subtarget); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); -// TODO: refactor the [SU]DIVREM8_[SZ]EXT_HREG code so that it's not duplicated. case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget); case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget); @@ -28763,3 +28887,51 @@ bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const { Attribute::MinSize); return OptSize && !VT.isVector(); } + +void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { + if (!Subtarget->is64Bit()) + return; + + // Update IsSplitCSR in X86MachineFunctionInfo. + X86MachineFunctionInfo *AFI = + Entry->getParent()->getInfo<X86MachineFunctionInfo>(); + AFI->setIsSplitCSR(true); +} + +void X86TargetLowering::insertCopiesSplitCSR( + MachineBasicBlock *Entry, + const SmallVectorImpl<MachineBasicBlock *> &Exits) const { + const X86RegisterInfo *TRI = Subtarget->getRegisterInfo(); + const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); + if (!IStart) + return; + + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); + for (const MCPhysReg *I = IStart; *I; ++I) { + const TargetRegisterClass *RC = nullptr; + if (X86::GR64RegClass.contains(*I)) + RC = &X86::GR64RegClass; + else + llvm_unreachable("Unexpected register class in CSRsViaCopy!"); + + unsigned NewVR = MRI->createVirtualRegister(RC); + // Create copy from CSR to a virtual register. + // FIXME: this currently does not emit CFI pseudo-instructions, it works + // fine for CXX_FAST_TLS since the C++-style TLS access functions should be + // nounwind. If we want to generalize this later, we may need to emit + // CFI pseudo-instructions. + assert(Entry->getParent()->getFunction()->hasFnAttribute( + Attribute::NoUnwind) && + "Function should be nounwind in insertCopiesSplitCSR!"); + Entry->addLiveIn(*I); + BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), + NewVR) + .addReg(*I); + + for (auto *Exit : Exits) + BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), + *I) + .addReg(NewVR); + } +} diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h index 8bb0e5f..0ab786e 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h @@ -316,6 +316,9 @@ namespace llvm { // Vector shift elements by immediate VSHLI, VSRLI, VSRAI, + // Bit rotate by immediate + VROTLI, VROTRI, + // Vector packed double/float comparison. CMPP, @@ -837,6 +840,13 @@ namespace llvm { /// from i32 to i8 but not from i32 to i16. bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; + /// Given an intrinsic, checks if on the target the intrinsic will need to map + /// to a MemIntrinsicNode (touches memory). If this is the case, it returns + /// true and stores the intrinsic information into the IntrinsicInfo that was + /// passed to the function. + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + unsigned Intrinsic) const override; + /// Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. @@ -1057,6 +1067,15 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, SDLoc dl, SelectionDAG &DAG) const override; + bool supportSplitCSR(MachineFunction *MF) const override { + return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && + MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); + } + void initializeSplitCSR(MachineBasicBlock *Entry) const override; + void insertCopiesSplitCSR( + MachineBasicBlock *Entry, + const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; + bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; bool mayBeEmittedAsTailCall(CallInst *CI) const override; diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td index 0a27c33..49be648 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td @@ -188,7 +188,7 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F, let isCommutable = IsCommutable in def NAME: AVX512<O, F, Outs, Ins, OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# - "$dst , "#IntelSrcAsm#"}", + "$dst, "#IntelSrcAsm#"}", Pattern, itin>; // Prefer over VMOV*rrk Pat<> @@ -323,18 +323,16 @@ multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, list<dag> Pattern, - list<dag> MaskingPattern, - string Round = "", - InstrItinClass itin = NoItinerary> { + list<dag> MaskingPattern> { def NAME: AVX512<O, F, Outs, Ins, - OpcodeStr#"\t{"#AttSrcAsm#", $dst "#Round#"|"# - "$dst "#Round#", "#IntelSrcAsm#"}", - Pattern, itin>; + OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# + "$dst, "#IntelSrcAsm#"}", + Pattern, NoItinerary>; def NAME#k: AVX512<O, F, Outs, MaskingIns, - OpcodeStr#"\t{"#Round#AttSrcAsm#", $dst {${mask}}|"# - "$dst {${mask}}, "#IntelSrcAsm#Round#"}", - MaskingPattern, itin>, EVEX_K; + OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# + "$dst {${mask}}, "#IntelSrcAsm#"}", + MaskingPattern, NoItinerary>, EVEX_K; } multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, @@ -342,33 +340,27 @@ multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, dag Ins, dag MaskingIns, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS, dag MaskingRHS, - string Round = "", - InstrItinClass itin = NoItinerary> : + dag RHS, dag MaskingRHS> : AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, AttSrcAsm, IntelSrcAsm, [(set _.KRC:$dst, RHS)], - [(set _.KRC:$dst, MaskingRHS)], - Round, NoItinerary>; + [(set _.KRC:$dst, MaskingRHS)]>; multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS, string Round = "", - InstrItinClass itin = NoItinerary> : + dag RHS> : AVX512_maskable_common_cmp<O, F, _, Outs, Ins, !con((ins _.KRCWM:$mask), Ins), OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, - (and _.KRCWM:$mask, RHS), - Round, itin>; + (and _.KRCWM:$mask, RHS)>; multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm> : AVX512_maskable_custom_cmp<O, F, Outs, Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr, - AttSrcAsm, IntelSrcAsm, - [],[],"", NoItinerary>; + AttSrcAsm, IntelSrcAsm, [],[]>; // Bitcasts between 512-bit vector types. Return the original type since // no instruction is needed for the conversion @@ -1294,7 +1286,7 @@ multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> { def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), !strconcat(OpcodeStr, - "\t{$src2, $src1, ${dst} |${dst}, $src1, $src2}"), + "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, EVEX_4V; def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), @@ -1311,7 +1303,7 @@ multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> { def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2), !strconcat(OpcodeStr, - "\t{$src2, $src1, ${dst} |${dst}, $src1, $src2}"), + "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), @@ -1426,7 +1418,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd> (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), "vcmp${cc}"#_.Suffix, - "{sae}, $src2, $src1", "$src1, $src2,{sae}", + "{sae}, $src2, $src1", "$src1, $src2, {sae}", (OpNodeRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc, @@ -1449,7 +1441,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd> (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, - "$cc,{sae}, $src2, $src1","$src1, $src2,{sae}, $cc">, + "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">, EVEX_4V, EVEX_B; }// let isAsmParserOnly = 1, hasSideEffects = 0 @@ -1831,7 +1823,7 @@ multiclass avx512_vcmp_sae<X86VectorVTInfo _> { defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), "vcmp${cc}"#_.Suffix, - "{sae}, $src2, $src1", "$src1, $src2,{sae}", + "{sae}, $src2, $src1", "$src1, $src2, {sae}", (X86cmpmRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc, @@ -1842,8 +1834,8 @@ multiclass avx512_vcmp_sae<X86VectorVTInfo _> { (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, - "$cc,{sae}, $src2, $src1", - "$src1, $src2,{sae}, $cc">, EVEX_B; + "$cc, {sae}, $src2, $src1", + "$src1, $src2, {sae}, $cc">, EVEX_B; } } @@ -1889,13 +1881,13 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, let Predicates = [prd] in { def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),//_.KRC:$dst), (ins _.RC:$src1, i32u8imm:$src2), - OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst | $dst, $src1, $src2}", + OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1), (i32 imm:$src2)))], NoItinerary>; def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix# - "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}", + "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode (_.VT _.RC:$src1), (i32 imm:$src2))))], NoItinerary>, EVEX_K; @@ -1903,14 +1895,14 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.MemOp:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix## - "\t{$src2, $src1, $dst | $dst, $src1, $src2}", + "\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst, (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), (i32 imm:$src2)))], NoItinerary>; def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix## - "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}", + "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), (i32 imm:$src2))))], NoItinerary>, EVEX_K; @@ -1925,13 +1917,13 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, string mem, string broadcast>{ def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src1, i32u8imm:$src2), - OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst | $dst, $src1, $src2}", + OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1), (i32 imm:$src2)))], NoItinerary>; def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix# - "\t{$src2, $src1, $dst {${mask}}| $dst {${mask}}, $src1, $src2}", + "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode (_.VT _.RC:$src1), (i32 imm:$src2))))], NoItinerary>, EVEX_K; @@ -1939,21 +1931,21 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.MemOp:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix##mem# - "\t{$src2, $src1, $dst | $dst, $src1, $src2}", + "\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), (i32 imm:$src2)))], NoItinerary>; def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix##mem# - "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}", + "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), (i32 imm:$src2))))], NoItinerary>, EVEX_K; def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.ScalarMemOp:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"## - _.BroadcastStr##", $dst | $dst, ${src1}" + _.BroadcastStr##", $dst|$dst, ${src1}" ##_.BroadcastStr##", $src2}", [(set _.KRC:$dst,(OpNode (_.VT (X86VBroadcast @@ -1962,7 +1954,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"## - _.BroadcastStr##", $dst {${mask}} | $dst {${mask}}, ${src1}"## + _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"## _.BroadcastStr##", $src2}", [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode (_.VT (X86VBroadcast @@ -2715,30 +2707,6 @@ defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 0>, avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>; -def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr, - (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)), - (VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>; - -def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr, - (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)), - (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>; - -def: Pat<(v8f64 (int_x86_avx512_mask_load_pd_512 addr:$ptr, - (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)), - (VMOVAPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>; - -def: Pat<(v16f32 (int_x86_avx512_mask_load_ps_512 addr:$ptr, - (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)), - (VMOVAPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>; - -def: Pat<(v8f64 (int_x86_avx512_mask_load_pd_512 addr:$ptr, - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))), - (VMOVAPDZrm addr:$ptr)>; - -def: Pat<(v16f32 (int_x86_avx512_mask_load_ps_512 addr:$ptr, - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))), - (VMOVAPSZrm addr:$ptr)>; - def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src), GR16:$mask), (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), @@ -4088,8 +4056,8 @@ defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>, defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai>, avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>, AVX512BIi8Base, EVEX_4V; -defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>, AVX512BIi8Base, EVEX_4V; -defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>, AVX512BIi8Base, EVEX_4V; +defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri>, AVX512BIi8Base, EVEX_4V; +defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli>, AVX512BIi8Base, EVEX_4V; defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>; defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>; @@ -6057,12 +6025,12 @@ multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, let mayStore = 1 in { def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, SrcInfo.RC:$src), - OpcodeStr # "\t{$src, $dst |$dst, $src}", + OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, EVEX; def mrk : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), - OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}", + OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, EVEX, EVEX_K; }//mayStore = 1 } @@ -6666,12 +6634,12 @@ multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _, let mayStore = 1 in { def mr : AVX5128I<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), - OpcodeStr # "\t{$src, $dst |$dst, $src}", + OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, EVEX_CD8<_.EltSize, CD8VT1>; def mrk : AVX5128I<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), - OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}", + OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", [(store (_.VT (vselect _.KRCWM:$mask, (_.VT (X86compress _.RC:$src)), _.ImmAllZerosV)), addr:$dst)]>, @@ -6766,7 +6734,7 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _>{ defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, i32u8imm:$src2), - OpcodeStr##_.Suffix, "$src2,{sae}, $src1", + OpcodeStr##_.Suffix, "$src2, {sae}, $src1", "$src1, {sae}, $src2", (OpNode (_.VT _.RC:$src1), (i32 imm:$src2), @@ -6895,8 +6863,8 @@ multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _>{ defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), - OpcodeStr, "$src3,{sae}, $src2, $src1", - "$src1, $src2,{sae}, $src3", + OpcodeStr, "$src3, {sae}, $src2, $src1", + "$src1, $src2, {sae}, $src3", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 imm:$src3), @@ -6907,8 +6875,8 @@ multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), - OpcodeStr, "$src3,{sae}, $src2, $src1", - "$src1, $src2,{sae}, $src3", + OpcodeStr, "$src3, {sae}, $src2, $src1", + "$src1, $src2, {sae}, $src3", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 imm:$src3), diff --git a/contrib/llvm/lib/Target/X86/X86InstrExtension.td b/contrib/llvm/lib/Target/X86/X86InstrExtension.td index c4b2d6d..af43d9f 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrExtension.td +++ b/contrib/llvm/lib/Target/X86/X86InstrExtension.td @@ -98,22 +98,22 @@ let hasSideEffects = 0, isCodeGenOnly = 1 in { def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX", - [], IIC_MOVZX>, TB, Sched<[WriteALU]>; + [], IIC_MOVZX>, TB, OpSize32, Sched<[WriteALU]>; let mayLoad = 1 in def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX", - [], IIC_MOVZX>, TB, Sched<[WriteALULd]>; + [], IIC_MOVZX>, TB, OpSize32, Sched<[WriteALULd]>; def MOVSX32_NOREXrr8 : I<0xBE, MRMSrcReg, (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src), "movs{bl|x}\t{$src, $dst|$dst, $src} # NOREX", - [], IIC_MOVSX>, TB, Sched<[WriteALU]>; + [], IIC_MOVSX>, TB, OpSize32, Sched<[WriteALU]>; let mayLoad = 1 in def MOVSX32_NOREXrm8 : I<0xBE, MRMSrcMem, (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src), "movs{bl|x}\t{$src, $dst|$dst, $src} # NOREX", - [], IIC_MOVSX>, TB, Sched<[WriteALULd]>; + [], IIC_MOVSX>, TB, OpSize32, Sched<[WriteALULd]>; } // MOVSX64rr8 always has a REX prefix and it has an 8-bit register @@ -146,18 +146,22 @@ def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), Sched<[WriteALULd]>, Requires<[In64BitMode]>; // movzbq and movzwq encodings for the disassembler -def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src), - "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, - TB, Sched<[WriteALU]>; -def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src), - "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, - TB, Sched<[WriteALULd]>; -def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, - TB, Sched<[WriteALU]>; -def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), - "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, - TB, Sched<[WriteALULd]>; +let hasSideEffects = 0 in { +def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src), + "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, + TB, Sched<[WriteALU]>; +let mayLoad = 1 in +def MOVZX64rm8 : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src), + "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, + TB, Sched<[WriteALULd]>; +def MOVZX64rr16 : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), + "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, + TB, Sched<[WriteALU]>; +let mayLoad = 1 in +def MOVZX64rm16 : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), + "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, + TB, Sched<[WriteALULd]>; +} // 64-bit zero-extension patterns use SUBREG_TO_REG and an operation writing a // 32-bit register. diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 829cedd..6432863 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -225,6 +225,9 @@ def X86vshli : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>; def X86vsrli : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>; def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>; +def X86vrotli : SDNode<"X86ISD::VROTLI", SDTIntShiftOp>; +def X86vrotri : SDNode<"X86ISD::VROTRI", SDTIntShiftOp>; + def X86vprot : SDNode<"X86ISD::VPROT", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td index ea8e562..9c8339a 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td @@ -1273,7 +1273,7 @@ def STOSW : I<0xAB, RawFrmDst, (outs dstidx16:$dst), (ins), let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in def STOSL : I<0xAB, RawFrmDst, (outs dstidx32:$dst), (ins), "stos{l|d}\t{%eax, $dst|$dst, eax}", [], IIC_STOS>, OpSize32; -let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in +let Defs = [RDI], Uses = [RAX,RDI,EFLAGS] in def STOSQ : RI<0xAB, RawFrmDst, (outs dstidx64:$dst), (ins), "stosq\t{%rax, $dst|$dst, rax}", [], IIC_STOS>; @@ -2755,56 +2755,56 @@ def : InstAlias<"aam", (AAM8i8 10)>, Requires<[Not64BitMode]>; // Disambiguate the mem/imm form of bt-without-a-suffix as btl. // Likewise for btc/btr/bts. -def : InstAlias<"bt {$imm, $mem|$mem, $imm}", +def : InstAlias<"bt\t{$imm, $mem|$mem, $imm}", (BT32mi8 i32mem:$mem, i32i8imm:$imm), 0>; -def : InstAlias<"btc {$imm, $mem|$mem, $imm}", +def : InstAlias<"btc\t{$imm, $mem|$mem, $imm}", (BTC32mi8 i32mem:$mem, i32i8imm:$imm), 0>; -def : InstAlias<"btr {$imm, $mem|$mem, $imm}", +def : InstAlias<"btr\t{$imm, $mem|$mem, $imm}", (BTR32mi8 i32mem:$mem, i32i8imm:$imm), 0>; -def : InstAlias<"bts {$imm, $mem|$mem, $imm}", +def : InstAlias<"bts\t{$imm, $mem|$mem, $imm}", (BTS32mi8 i32mem:$mem, i32i8imm:$imm), 0>; // clr aliases. -def : InstAlias<"clrb $reg", (XOR8rr GR8 :$reg, GR8 :$reg), 0>; -def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg), 0>; -def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg), 0>; -def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg), 0>; +def : InstAlias<"clrb\t$reg", (XOR8rr GR8 :$reg, GR8 :$reg), 0>; +def : InstAlias<"clrw\t$reg", (XOR16rr GR16:$reg, GR16:$reg), 0>; +def : InstAlias<"clrl\t$reg", (XOR32rr GR32:$reg, GR32:$reg), 0>; +def : InstAlias<"clrq\t$reg", (XOR64rr GR64:$reg, GR64:$reg), 0>; // lods aliases. Accept the destination being omitted because it's implicit // in the mnemonic, or the mnemonic suffix being omitted because it's implicit // in the destination. -def : InstAlias<"lodsb $src", (LODSB srcidx8:$src), 0>; -def : InstAlias<"lodsw $src", (LODSW srcidx16:$src), 0>; -def : InstAlias<"lods{l|d} $src", (LODSL srcidx32:$src), 0>; -def : InstAlias<"lodsq $src", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>; -def : InstAlias<"lods {$src, %al|al, $src}", (LODSB srcidx8:$src), 0>; -def : InstAlias<"lods {$src, %ax|ax, $src}", (LODSW srcidx16:$src), 0>; -def : InstAlias<"lods {$src, %eax|eax, $src}", (LODSL srcidx32:$src), 0>; -def : InstAlias<"lods {$src, %rax|rax, $src}", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>; +def : InstAlias<"lodsb\t$src", (LODSB srcidx8:$src), 0>; +def : InstAlias<"lodsw\t$src", (LODSW srcidx16:$src), 0>; +def : InstAlias<"lods{l|d}\t$src", (LODSL srcidx32:$src), 0>; +def : InstAlias<"lodsq\t$src", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>; +def : InstAlias<"lods\t{$src, %al|al, $src}", (LODSB srcidx8:$src), 0>; +def : InstAlias<"lods\t{$src, %ax|ax, $src}", (LODSW srcidx16:$src), 0>; +def : InstAlias<"lods\t{$src, %eax|eax, $src}", (LODSL srcidx32:$src), 0>; +def : InstAlias<"lods\t{$src, %rax|rax, $src}", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>; // stos aliases. Accept the source being omitted because it's implicit in // the mnemonic, or the mnemonic suffix being omitted because it's implicit // in the source. -def : InstAlias<"stosb $dst", (STOSB dstidx8:$dst), 0>; -def : InstAlias<"stosw $dst", (STOSW dstidx16:$dst), 0>; -def : InstAlias<"stos{l|d} $dst", (STOSL dstidx32:$dst), 0>; -def : InstAlias<"stosq $dst", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; -def : InstAlias<"stos {%al, $dst|$dst, al}", (STOSB dstidx8:$dst), 0>; -def : InstAlias<"stos {%ax, $dst|$dst, ax}", (STOSW dstidx16:$dst), 0>; -def : InstAlias<"stos {%eax, $dst|$dst, eax}", (STOSL dstidx32:$dst), 0>; -def : InstAlias<"stos {%rax, $dst|$dst, rax}", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; +def : InstAlias<"stosb\t$dst", (STOSB dstidx8:$dst), 0>; +def : InstAlias<"stosw\t$dst", (STOSW dstidx16:$dst), 0>; +def : InstAlias<"stos{l|d}\t$dst", (STOSL dstidx32:$dst), 0>; +def : InstAlias<"stosq\t$dst", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; +def : InstAlias<"stos\t{%al, $dst|$dst, al}", (STOSB dstidx8:$dst), 0>; +def : InstAlias<"stos\t{%ax, $dst|$dst, ax}", (STOSW dstidx16:$dst), 0>; +def : InstAlias<"stos\t{%eax, $dst|$dst, eax}", (STOSL dstidx32:$dst), 0>; +def : InstAlias<"stos\t{%rax, $dst|$dst, rax}", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; // scas aliases. Accept the destination being omitted because it's implicit // in the mnemonic, or the mnemonic suffix being omitted because it's implicit // in the destination. -def : InstAlias<"scasb $dst", (SCASB dstidx8:$dst), 0>; -def : InstAlias<"scasw $dst", (SCASW dstidx16:$dst), 0>; -def : InstAlias<"scas{l|d} $dst", (SCASL dstidx32:$dst), 0>; -def : InstAlias<"scasq $dst", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; -def : InstAlias<"scas {$dst, %al|al, $dst}", (SCASB dstidx8:$dst), 0>; -def : InstAlias<"scas {$dst, %ax|ax, $dst}", (SCASW dstidx16:$dst), 0>; -def : InstAlias<"scas {$dst, %eax|eax, $dst}", (SCASL dstidx32:$dst), 0>; -def : InstAlias<"scas {$dst, %rax|rax, $dst}", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; +def : InstAlias<"scasb\t$dst", (SCASB dstidx8:$dst), 0>; +def : InstAlias<"scasw\t$dst", (SCASW dstidx16:$dst), 0>; +def : InstAlias<"scas{l|d}\t$dst", (SCASL dstidx32:$dst), 0>; +def : InstAlias<"scasq\t$dst", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; +def : InstAlias<"scas\t{$dst, %al|al, $dst}", (SCASB dstidx8:$dst), 0>; +def : InstAlias<"scas\t{$dst, %ax|ax, $dst}", (SCASW dstidx16:$dst), 0>; +def : InstAlias<"scas\t{$dst, %eax|eax, $dst}", (SCASL dstidx32:$dst), 0>; +def : InstAlias<"scas\t{$dst, %rax|rax, $dst}", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; // div and idiv aliases for explicit A register. def : InstAlias<"div{b}\t{$src, %al|al, $src}", (DIV8r GR8 :$src)>; @@ -2892,30 +2892,30 @@ def : InstAlias<"fnstsw" , (FNSTSW16r)>; // lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but // this is compatible with what GAS does. -def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>; -def : InstAlias<"ljmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>; -def : InstAlias<"lcall {*}$dst", (FARCALL32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>; -def : InstAlias<"ljmp {*}$dst", (FARJMP32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>; -def : InstAlias<"lcall $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>; -def : InstAlias<"ljmp $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>; -def : InstAlias<"lcall {*}$dst", (FARCALL16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>; -def : InstAlias<"ljmp {*}$dst", (FARJMP16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>; - -def : InstAlias<"call {*}$dst", (CALL64m i64mem:$dst), 0>, Requires<[In64BitMode]>; -def : InstAlias<"jmp {*}$dst", (JMP64m i64mem:$dst), 0>, Requires<[In64BitMode]>; -def : InstAlias<"call {*}$dst", (CALL32m i32mem:$dst), 0>, Requires<[In32BitMode]>; -def : InstAlias<"jmp {*}$dst", (JMP32m i32mem:$dst), 0>, Requires<[In32BitMode]>; -def : InstAlias<"call {*}$dst", (CALL16m i16mem:$dst), 0>, Requires<[In16BitMode]>; -def : InstAlias<"jmp {*}$dst", (JMP16m i16mem:$dst), 0>, Requires<[In16BitMode]>; +def : InstAlias<"lcall\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>; +def : InstAlias<"ljmp\t$seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>; +def : InstAlias<"lcall\t{*}$dst", (FARCALL32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>; +def : InstAlias<"ljmp\t{*}$dst", (FARJMP32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>; +def : InstAlias<"lcall\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>; +def : InstAlias<"ljmp\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>; +def : InstAlias<"lcall\t{*}$dst", (FARCALL16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>; +def : InstAlias<"ljmp\t{*}$dst", (FARJMP16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>; + +def : InstAlias<"call\t{*}$dst", (CALL64m i64mem:$dst), 0>, Requires<[In64BitMode]>; +def : InstAlias<"jmp\t{*}$dst", (JMP64m i64mem:$dst), 0>, Requires<[In64BitMode]>; +def : InstAlias<"call\t{*}$dst", (CALL32m i32mem:$dst), 0>, Requires<[In32BitMode]>; +def : InstAlias<"jmp\t{*}$dst", (JMP32m i32mem:$dst), 0>, Requires<[In32BitMode]>; +def : InstAlias<"call\t{*}$dst", (CALL16m i16mem:$dst), 0>, Requires<[In16BitMode]>; +def : InstAlias<"jmp\t{*}$dst", (JMP16m i16mem:$dst), 0>, Requires<[In16BitMode]>; // "imul <imm>, B" is an alias for "imul <imm>, B, B". -def : InstAlias<"imul{w} {$imm, $r|$r, $imm}", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm), 0>; -def : InstAlias<"imul{w} {$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>; -def : InstAlias<"imul{l} {$imm, $r|$r, $imm}", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm), 0>; -def : InstAlias<"imul{l} {$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>; -def : InstAlias<"imul{q} {$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>; -def : InstAlias<"imul{q} {$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>; +def : InstAlias<"imul{w}\t{$imm, $r|$r, $imm}", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm), 0>; +def : InstAlias<"imul{w}\t{$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>; +def : InstAlias<"imul{l}\t{$imm, $r|$r, $imm}", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm), 0>; +def : InstAlias<"imul{l}\t{$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>; +def : InstAlias<"imul{q}\t{$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>; +def : InstAlias<"imul{q}\t{$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>; // inb %dx -> inb %al, %dx def : InstAlias<"inb\t{%dx|dx}", (IN8rr), 0>; @@ -2927,46 +2927,46 @@ def : InstAlias<"inl\t$port", (IN32ri u8imm:$port), 0>; // jmp and call aliases for lcall and ljmp. jmp $42,$5 -> ljmp -def : InstAlias<"call $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>; -def : InstAlias<"jmp $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>; -def : InstAlias<"call $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>; -def : InstAlias<"jmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>; -def : InstAlias<"callw $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>; -def : InstAlias<"jmpw $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>; -def : InstAlias<"calll $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>; -def : InstAlias<"jmpl $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>; +def : InstAlias<"call\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>; +def : InstAlias<"jmp\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>; +def : InstAlias<"call\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>; +def : InstAlias<"jmp\t$seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>; +def : InstAlias<"callw\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>; +def : InstAlias<"jmpw\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>; +def : InstAlias<"calll\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>; +def : InstAlias<"jmpl\t$seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>; // Force mov without a suffix with a segment and mem to prefer the 'l' form of // the move. All segment/mem forms are equivalent, this has the shortest // encoding. -def : InstAlias<"mov {$mem, $seg|$seg, $mem}", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem), 0>; -def : InstAlias<"mov {$seg, $mem|$mem, $seg}", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg), 0>; +def : InstAlias<"mov\t{$mem, $seg|$seg, $mem}", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem), 0>; +def : InstAlias<"mov\t{$seg, $mem|$mem, $seg}", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg), 0>; // Match 'movq <largeimm>, <reg>' as an alias for movabsq. -def : InstAlias<"movq {$imm, $reg|$reg, $imm}", (MOV64ri GR64:$reg, i64imm:$imm), 0>; +def : InstAlias<"movq\t{$imm, $reg|$reg, $imm}", (MOV64ri GR64:$reg, i64imm:$imm), 0>; // Match 'movq GR64, MMX' as an alias for movd. -def : InstAlias<"movq {$src, $dst|$dst, $src}", +def : InstAlias<"movq\t{$src, $dst|$dst, $src}", (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>; -def : InstAlias<"movq {$src, $dst|$dst, $src}", +def : InstAlias<"movq\t{$src, $dst|$dst, $src}", (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>; // movsx aliases -def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>; -def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>; -def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>; -def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>; -def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>; -def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>; -def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>; +def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>; +def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>; +def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>; +def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>; +def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>; +def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>; +def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>; // movzx aliases -def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>; -def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>; -def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>; -def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>; -def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>; -def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX64rr16_Q GR64:$dst, GR16:$src), 0>; +def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>; +def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>; +def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>; +def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>; +def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX64rr8 GR64:$dst, GR8:$src), 0>; +def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX64rr16 GR64:$dst, GR16:$src), 0>; // Note: No GR32->GR64 movzx form. // outb %dx -> outb %al, %dx diff --git a/contrib/llvm/lib/Target/X86/X86InstrMPX.td b/contrib/llvm/lib/Target/X86/X86InstrMPX.td index 31608cd..71ab973 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrMPX.td +++ b/contrib/llvm/lib/Target/X86/X86InstrMPX.td @@ -15,10 +15,10 @@ multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> { def 32rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins i32mem:$src), - OpcodeStr#" \t{$src, $dst|$dst, $src}", []>, + OpcodeStr#"\t{$src, $dst|$dst, $src}", []>, Requires<[HasMPX, Not64BitMode]>; def 64rm: RI<opc, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src), - OpcodeStr#" \t{$src, $dst|$dst, $src}", []>, + OpcodeStr#"\t{$src, $dst|$dst, $src}", []>, Requires<[HasMPX, In64BitMode]>; } @@ -26,16 +26,16 @@ defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS; multiclass mpx_bound_check<bits<8> opc, string OpcodeStr> { def 32rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, i32mem:$src2), - OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>, + OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>, Requires<[HasMPX, Not64BitMode]>; def 64rm: RI<opc, MRMSrcMem, (outs), (ins BNDR:$src1, i64mem:$src2), - OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>, + OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>, Requires<[HasMPX, In64BitMode]>; def 32rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR32:$src2), - OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>, + OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>, Requires<[HasMPX, Not64BitMode]>; def 64rr: RI<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR64:$src2), - OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>, + OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>, Requires<[HasMPX, In64BitMode]>; } defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS; @@ -43,28 +43,28 @@ defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD; defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD; def BNDMOVRMrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src), - "bndmov \t{$src, $dst|$dst, $src}", []>, PD, + "bndmov\t{$src, $dst|$dst, $src}", []>, PD, Requires<[HasMPX]>; def BNDMOVRM32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src), - "bndmov \t{$src, $dst|$dst, $src}", []>, PD, + "bndmov\t{$src, $dst|$dst, $src}", []>, PD, Requires<[HasMPX, Not64BitMode]>; def BNDMOVRM64rm : RI<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src), - "bndmov \t{$src, $dst|$dst, $src}", []>, PD, + "bndmov\t{$src, $dst|$dst, $src}", []>, PD, Requires<[HasMPX, In64BitMode]>; def BNDMOVMRrr : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src), - "bndmov \t{$src, $dst|$dst, $src}", []>, PD, + "bndmov\t{$src, $dst|$dst, $src}", []>, PD, Requires<[HasMPX]>; def BNDMOVMR32mr : I<0x1B, MRMDestMem, (outs i64mem:$dst), (ins BNDR:$src), - "bndmov \t{$src, $dst|$dst, $src}", []>, PD, + "bndmov\t{$src, $dst|$dst, $src}", []>, PD, Requires<[HasMPX, Not64BitMode]>; def BNDMOVMR64mr : RI<0x1B, MRMDestMem, (outs i128mem:$dst), (ins BNDR:$src), - "bndmov \t{$src, $dst|$dst, $src}", []>, PD, + "bndmov\t{$src, $dst|$dst, $src}", []>, PD, Requires<[HasMPX, In64BitMode]>; def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src), - "bndstx \t{$src, $dst|$dst, $src}", []>, PS, + "bndstx\t{$src, $dst|$dst, $src}", []>, PS, Requires<[HasMPX]>; def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src), - "bndldx \t{$src, $dst|$dst, $src}", []>, PS, + "bndldx\t{$src, $dst|$dst, $src}", []>, PS, Requires<[HasMPX]>; diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td index 624b931..6a7c456 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td +++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td @@ -1808,7 +1808,7 @@ def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", (CVTSD2SI64rr GR64:$dst, VR128:$src), 0>; def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", - (CVTSD2SI64rm GR64:$dst, sdmem:$src)>; + (CVTSD2SI64rm GR64:$dst, sdmem:$src), 0>; /// SSE 2 Only @@ -7838,9 +7838,7 @@ class avx_broadcast_rm<bits<8> opc, string OpcodeStr, RegisterClass RC, AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (VT (X86VBroadcast (ld_frag addr:$src))))]>, - Sched<[Sched]>, VEX { - let mayLoad = 1; -} + Sched<[Sched]>, VEX; // AVX2 adds register forms class avx2_broadcast_rr<bits<8> opc, string OpcodeStr, RegisterClass RC, @@ -7871,7 +7869,7 @@ let ExeDomain = SSEPackedDouble in def VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256, v4f64, v2f64, WriteFShuffle256>, VEX_L; -let mayLoad = 1, Predicates = [HasAVX2] in +let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX2] in def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), "vbroadcasti128\t{$src, $dst|$dst, $src}", []>, @@ -8259,6 +8257,9 @@ let Predicates = [HasF16C] in { (VCVTPH2PSrm addr:$src)>; def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)), (VCVTPH2PSrm addr:$src)>; + def : Pat<(int_x86_vcvtph2ps_128 (bitconvert + (v2i64 (scalar_to_vector (loadi64 addr:$src))))), + (VCVTPH2PSrm addr:$src)>; def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16 (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))), diff --git a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 646b556..b525d5e 100644 --- a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -29,7 +29,7 @@ enum IntrinsicType { INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, - EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC, + EXPAND_FROM_MEM, LOADA, LOADU, BLEND, INSERT_SUBVEC, TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK }; @@ -143,6 +143,18 @@ static const IntrinsicData IntrinsicsWithChain[] = { EXPAND_FROM_MEM, X86ISD::EXPAND, 0), X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512, EXPAND_FROM_MEM, X86ISD::EXPAND, 0), + X86_INTRINSIC_DATA(avx512_mask_load_pd_128, LOADA, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_load_pd_256, LOADA, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_load_pd_512, LOADA, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_load_ps_128, LOADA, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_load_ps_256, LOADA, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_load_ps_512, LOADA, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_loadu_pd_128, LOADU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_loadu_pd_256, LOADU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_loadu_pd_512, LOADU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_loadu_ps_128, LOADU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_loadu_ps_256, LOADU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_loadu_ps_512, LOADU, ISD::DELETED_NODE, 0), X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8, X86ISD::VTRUNC, 0), X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8, @@ -1129,6 +1141,42 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VTRUNCS, 0), X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_512, INTR_TYPE_1OP_MASK, X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovsxb_d_128, INTR_TYPE_1OP_MASK, + X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovsxb_d_256, INTR_TYPE_1OP_MASK, + X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovsxb_d_512, INTR_TYPE_1OP_MASK, + X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovsxb_q_128, INTR_TYPE_1OP_MASK, + X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovsxb_q_256, INTR_TYPE_1OP_MASK, + X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovsxb_q_512, INTR_TYPE_1OP_MASK, + X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovsxb_w_128, INTR_TYPE_1OP_MASK, + X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovsxb_w_256, INTR_TYPE_1OP_MASK, + X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovsxb_w_512, INTR_TYPE_1OP_MASK, + X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovsxd_q_128, INTR_TYPE_1OP_MASK, + X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovsxd_q_256, INTR_TYPE_1OP_MASK, + X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovsxd_q_512, INTR_TYPE_1OP_MASK, + X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovsxw_d_128, INTR_TYPE_1OP_MASK, + X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovsxw_d_256, INTR_TYPE_1OP_MASK, + X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovsxw_d_512, INTR_TYPE_1OP_MASK, + X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovsxw_q_128, INTR_TYPE_1OP_MASK, + X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovsxw_q_256, INTR_TYPE_1OP_MASK, + X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovsxw_q_512, INTR_TYPE_1OP_MASK, + X86ISD::VSEXT, 0), X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, INTR_TYPE_1OP_MASK, X86ISD::VTRUNCUS, 0), X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, INTR_TYPE_1OP_MASK, @@ -1165,6 +1213,42 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VTRUNCUS, 0), X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_512, INTR_TYPE_1OP_MASK, X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovzxb_d_128, INTR_TYPE_1OP_MASK, + X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovzxb_d_256, INTR_TYPE_1OP_MASK, + X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovzxb_d_512, INTR_TYPE_1OP_MASK, + X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovzxb_q_128, INTR_TYPE_1OP_MASK, + X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovzxb_q_256, INTR_TYPE_1OP_MASK, + X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovzxb_q_512, INTR_TYPE_1OP_MASK, + X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovzxb_w_128, INTR_TYPE_1OP_MASK, + X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovzxb_w_256, INTR_TYPE_1OP_MASK, + X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovzxb_w_512, INTR_TYPE_1OP_MASK, + X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovzxd_q_128, INTR_TYPE_1OP_MASK, + X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovzxd_q_256, INTR_TYPE_1OP_MASK, + X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovzxd_q_512, INTR_TYPE_1OP_MASK, + X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovzxw_d_128, INTR_TYPE_1OP_MASK, + X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovzxw_d_256, INTR_TYPE_1OP_MASK, + X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovzxw_d_512, INTR_TYPE_1OP_MASK, + X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovzxw_q_128, INTR_TYPE_1OP_MASK, + X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovzxw_q_256, INTR_TYPE_1OP_MASK, + X86ISD::VZEXT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovzxw_q_512, INTR_TYPE_1OP_MASK, + X86ISD::VZEXT, 0), X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK, X86ISD::PMULDQ, 0), X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK, @@ -1201,12 +1285,54 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_por_q_128, INTR_TYPE_2OP_MASK, ISD::OR, 0), X86_INTRINSIC_DATA(avx512_mask_por_q_256, INTR_TYPE_2OP_MASK, ISD::OR, 0), X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0), + X86_INTRINSIC_DATA(avx512_mask_prol_d_128, INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0), + X86_INTRINSIC_DATA(avx512_mask_prol_d_256, INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0), + X86_INTRINSIC_DATA(avx512_mask_prol_d_512, INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0), + X86_INTRINSIC_DATA(avx512_mask_prol_q_128, INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0), + X86_INTRINSIC_DATA(avx512_mask_prol_q_256, INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0), + X86_INTRINSIC_DATA(avx512_mask_prol_q_512, INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0), + X86_INTRINSIC_DATA(avx512_mask_prolv_d_128, INTR_TYPE_2OP_MASK, ISD::ROTL, 0), + X86_INTRINSIC_DATA(avx512_mask_prolv_d_256, INTR_TYPE_2OP_MASK, ISD::ROTL, 0), + X86_INTRINSIC_DATA(avx512_mask_prolv_d_512, INTR_TYPE_2OP_MASK, ISD::ROTL, 0), + X86_INTRINSIC_DATA(avx512_mask_prolv_q_128, INTR_TYPE_2OP_MASK, ISD::ROTL, 0), + X86_INTRINSIC_DATA(avx512_mask_prolv_q_256, INTR_TYPE_2OP_MASK, ISD::ROTL, 0), + X86_INTRINSIC_DATA(avx512_mask_prolv_q_512, INTR_TYPE_2OP_MASK, ISD::ROTL, 0), + X86_INTRINSIC_DATA(avx512_mask_pror_d_128, INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0), + X86_INTRINSIC_DATA(avx512_mask_pror_d_256, INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0), + X86_INTRINSIC_DATA(avx512_mask_pror_d_512, INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0), + X86_INTRINSIC_DATA(avx512_mask_pror_q_128, INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0), + X86_INTRINSIC_DATA(avx512_mask_pror_q_256, INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0), + X86_INTRINSIC_DATA(avx512_mask_pror_q_512, INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0), + X86_INTRINSIC_DATA(avx512_mask_prorv_d_128, INTR_TYPE_2OP_MASK, ISD::ROTR, 0), + X86_INTRINSIC_DATA(avx512_mask_prorv_d_256, INTR_TYPE_2OP_MASK, ISD::ROTR, 0), + X86_INTRINSIC_DATA(avx512_mask_prorv_d_512, INTR_TYPE_2OP_MASK, ISD::ROTR, 0), + X86_INTRINSIC_DATA(avx512_mask_prorv_q_128, INTR_TYPE_2OP_MASK, ISD::ROTR, 0), + X86_INTRINSIC_DATA(avx512_mask_prorv_q_256, INTR_TYPE_2OP_MASK, ISD::ROTR, 0), + X86_INTRINSIC_DATA(avx512_mask_prorv_q_512, INTR_TYPE_2OP_MASK, ISD::ROTR, 0), X86_INTRINSIC_DATA(avx512_mask_pshuf_b_128, INTR_TYPE_2OP_MASK, X86ISD::PSHUFB, 0), X86_INTRINSIC_DATA(avx512_mask_pshuf_b_256, INTR_TYPE_2OP_MASK, X86ISD::PSHUFB, 0), X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK, X86ISD::PSHUFB, 0), + X86_INTRINSIC_DATA(avx512_mask_pshuf_d_128, INTR_TYPE_2OP_MASK, + X86ISD::PSHUFD, 0), + X86_INTRINSIC_DATA(avx512_mask_pshuf_d_256, INTR_TYPE_2OP_MASK, + X86ISD::PSHUFD, 0), + X86_INTRINSIC_DATA(avx512_mask_pshuf_d_512, INTR_TYPE_2OP_MASK, + X86ISD::PSHUFD, 0), + X86_INTRINSIC_DATA(avx512_mask_pshufh_w_128, INTR_TYPE_2OP_MASK, + X86ISD::PSHUFHW, 0), + X86_INTRINSIC_DATA(avx512_mask_pshufh_w_256, INTR_TYPE_2OP_MASK, + X86ISD::PSHUFHW, 0), + X86_INTRINSIC_DATA(avx512_mask_pshufh_w_512, INTR_TYPE_2OP_MASK, + X86ISD::PSHUFHW, 0), + X86_INTRINSIC_DATA(avx512_mask_pshufl_w_128, INTR_TYPE_2OP_MASK, + X86ISD::PSHUFLW, 0), + X86_INTRINSIC_DATA(avx512_mask_pshufl_w_256, INTR_TYPE_2OP_MASK, + X86ISD::PSHUFLW, 0), + X86_INTRINSIC_DATA(avx512_mask_pshufl_w_512, INTR_TYPE_2OP_MASK, + X86ISD::PSHUFLW, 0), X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(avx512_mask_psll_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(avx512_mask_psll_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), @@ -1219,8 +1345,21 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_psll_qi_128, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_mask_psll_qi_256, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_mask_psll_qi_512, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psll_w_128, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), + X86_INTRINSIC_DATA(avx512_mask_psll_w_256, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), + X86_INTRINSIC_DATA(avx512_mask_psll_w_512, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), + X86_INTRINSIC_DATA(avx512_mask_psll_wi_128, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psll_wi_256, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psll_wi_512, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_mask_pslli_q, VSHIFT_MASK, X86ISD::VSHLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psllv16_hi, INTR_TYPE_2OP_MASK, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx512_mask_psllv2_di, INTR_TYPE_2OP_MASK, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx512_mask_psllv32hi, INTR_TYPE_2OP_MASK, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx512_mask_psllv4_di, INTR_TYPE_2OP_MASK, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx512_mask_psllv4_si, INTR_TYPE_2OP_MASK, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx512_mask_psllv8_hi, INTR_TYPE_2OP_MASK, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx512_mask_psllv8_si, INTR_TYPE_2OP_MASK, ISD::SHL, 0), X86_INTRINSIC_DATA(avx512_mask_psllv_d, INTR_TYPE_2OP_MASK, ISD::SHL, 0), X86_INTRINSIC_DATA(avx512_mask_psllv_q, INTR_TYPE_2OP_MASK, ISD::SHL, 0), X86_INTRINSIC_DATA(avx512_mask_psra_d, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0), @@ -1243,8 +1382,15 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_psra_wi_512, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0), X86_INTRINSIC_DATA(avx512_mask_psrai_d, VSHIFT_MASK, X86ISD::VSRAI, 0), X86_INTRINSIC_DATA(avx512_mask_psrai_q, VSHIFT_MASK, X86ISD::VSRAI, 0), + X86_INTRINSIC_DATA(avx512_mask_psrav16_hi, INTR_TYPE_2OP_MASK, ISD::SRA, 0), + X86_INTRINSIC_DATA(avx512_mask_psrav32_hi, INTR_TYPE_2OP_MASK, ISD::SRA, 0), + X86_INTRINSIC_DATA(avx512_mask_psrav4_si, INTR_TYPE_2OP_MASK, ISD::SRA, 0), + X86_INTRINSIC_DATA(avx512_mask_psrav8_hi, INTR_TYPE_2OP_MASK, ISD::SRA, 0), + X86_INTRINSIC_DATA(avx512_mask_psrav8_si, INTR_TYPE_2OP_MASK, ISD::SRA, 0), X86_INTRINSIC_DATA(avx512_mask_psrav_d, INTR_TYPE_2OP_MASK, ISD::SRA, 0), X86_INTRINSIC_DATA(avx512_mask_psrav_q, INTR_TYPE_2OP_MASK, ISD::SRA, 0), + X86_INTRINSIC_DATA(avx512_mask_psrav_q_128, INTR_TYPE_2OP_MASK, ISD::SRA, 0), + X86_INTRINSIC_DATA(avx512_mask_psrav_q_256, INTR_TYPE_2OP_MASK, ISD::SRA, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_d, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), diff --git a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h index 3a7a98d..00515dd 100644 --- a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -92,6 +92,10 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// used to address arguments in a function using a base pointer. int SEHFramePtrSaveIndex = 0; + /// True if this function has a subset of CSRs that is handled explicitly via + /// copies. + bool IsSplitCSR = false; + private: /// ForwardedMustTailRegParms - A list of virtual and physical registers /// that must be forwarded to every musttail call. @@ -160,6 +164,9 @@ public: SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() { return ForwardedMustTailRegParms; } + + bool isSplitCSR() const { return IsSplitCSR; } + void setIsSplitCSR(bool s) { IsSplitCSR = s; } }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp index 58020d9..45cc0ae 100644 --- a/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp +++ b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp @@ -9,8 +9,10 @@ // // This file defines the pass that performs some optimizations with LEA // instructions in order to improve code size. -// Currently, it does one thing: -// 1) Address calculations in load and store instructions are replaced by +// Currently, it does two things: +// 1) If there are two LEA instructions calculating addresses which only differ +// by displacement inside a basic block, one of them is removed. +// 2) Address calculations in load and store instructions are replaced by // existing LEA def registers where possible. // //===----------------------------------------------------------------------===// @@ -38,6 +40,7 @@ static cl::opt<bool> EnableX86LEAOpt("enable-x86-lea-opt", cl::Hidden, cl::init(false)); STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions"); +STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed"); namespace { class OptimizeLEAPass : public MachineFunctionPass { @@ -71,6 +74,13 @@ private: /// \brief Returns true if the instruction is LEA. bool isLEA(const MachineInstr &MI); + /// \brief Returns true if the \p Last LEA instruction can be replaced by the + /// \p First. The difference between displacements of the addresses calculated + /// by these LEAs is returned in \p AddrDispShift. It'll be used for proper + /// replacement of the \p Last LEA's uses with the \p First's def register. + bool isReplaceable(const MachineInstr &First, const MachineInstr &Last, + int64_t &AddrDispShift); + /// \brief Returns true if two instructions have memory operands that only /// differ by displacement. The numbers of the first memory operands for both /// instructions are specified through \p N1 and \p N2. The address @@ -79,13 +89,20 @@ private: const MachineInstr &MI2, unsigned N2, int64_t &AddrDispShift); - /// \brief Find all LEA instructions in the basic block. + /// \brief Find all LEA instructions in the basic block. Also, assign position + /// numbers to all instructions in the basic block to speed up calculation of + /// distance between them. void findLEAs(const MachineBasicBlock &MBB, SmallVectorImpl<MachineInstr *> &List); /// \brief Removes redundant address calculations. bool removeRedundantAddrCalc(const SmallVectorImpl<MachineInstr *> &List); + /// \brief Removes LEAs which calculate similar addresses. + bool removeRedundantLEAs(SmallVectorImpl<MachineInstr *> &List); + + DenseMap<const MachineInstr *, unsigned> InstrPos; + MachineRegisterInfo *MRI; const X86InstrInfo *TII; const X86RegisterInfo *TRI; @@ -99,14 +116,15 @@ FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); } int OptimizeLEAPass::calcInstrDist(const MachineInstr &First, const MachineInstr &Last) { - const MachineBasicBlock *MBB = First.getParent(); - - // Both instructions must be in the same basic block. - assert(Last.getParent() == MBB && + // Both instructions must be in the same basic block and they must be + // presented in InstrPos. + assert(Last.getParent() == First.getParent() && "Instructions are in different basic blocks"); + assert(InstrPos.find(&First) != InstrPos.end() && + InstrPos.find(&Last) != InstrPos.end() && + "Instructions' positions are undefined"); - return std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&Last)) - - std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&First)); + return InstrPos[&Last] - InstrPos[&First]; } // Find the best LEA instruction in the List to replace address recalculation in @@ -189,6 +207,69 @@ bool OptimizeLEAPass::isLEA(const MachineInstr &MI) { Opcode == X86::LEA64r || Opcode == X86::LEA64_32r; } +// Check that the Last LEA can be replaced by the First LEA. To be so, +// these requirements must be met: +// 1) Addresses calculated by LEAs differ only by displacement. +// 2) Def registers of LEAs belong to the same class. +// 3) All uses of the Last LEA def register are replaceable, thus the +// register is used only as address base. +bool OptimizeLEAPass::isReplaceable(const MachineInstr &First, + const MachineInstr &Last, + int64_t &AddrDispShift) { + assert(isLEA(First) && isLEA(Last) && + "The function works only with LEA instructions"); + + // Compare instructions' memory operands. + if (!isSimilarMemOp(Last, 1, First, 1, AddrDispShift)) + return false; + + // Make sure that LEA def registers belong to the same class. There may be + // instructions (like MOV8mr_NOREX) which allow a limited set of registers to + // be used as their operands, so we must be sure that replacing one LEA + // with another won't lead to putting a wrong register in the instruction. + if (MRI->getRegClass(First.getOperand(0).getReg()) != + MRI->getRegClass(Last.getOperand(0).getReg())) + return false; + + // Loop over all uses of the Last LEA to check that its def register is + // used only as address base for memory accesses. If so, it can be + // replaced, otherwise - no. + for (auto &MO : MRI->use_operands(Last.getOperand(0).getReg())) { + MachineInstr &MI = *MO.getParent(); + + // Get the number of the first memory operand. + const MCInstrDesc &Desc = MI.getDesc(); + int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()); + + // If the use instruction has no memory operand - the LEA is not + // replaceable. + if (MemOpNo < 0) + return false; + + MemOpNo += X86II::getOperandBias(Desc); + + // If the address base of the use instruction is not the LEA def register - + // the LEA is not replaceable. + if (!isIdenticalOp(MI.getOperand(MemOpNo + X86::AddrBaseReg), MO)) + return false; + + // If the LEA def register is used as any other operand of the use + // instruction - the LEA is not replaceable. + for (unsigned i = 0; i < MI.getNumOperands(); i++) + if (i != (unsigned)(MemOpNo + X86::AddrBaseReg) && + isIdenticalOp(MI.getOperand(i), MO)) + return false; + + // Check that the new address displacement will fit 4 bytes. + if (MI.getOperand(MemOpNo + X86::AddrDisp).isImm() && + !isInt<32>(MI.getOperand(MemOpNo + X86::AddrDisp).getImm() + + AddrDispShift)) + return false; + } + + return true; +} + // Check if MI1 and MI2 have memory operands which represent addresses that // differ only by displacement. bool OptimizeLEAPass::isSimilarMemOp(const MachineInstr &MI1, unsigned N1, @@ -219,7 +300,15 @@ bool OptimizeLEAPass::isSimilarMemOp(const MachineInstr &MI1, unsigned N1, void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB, SmallVectorImpl<MachineInstr *> &List) { + unsigned Pos = 0; for (auto &MI : MBB) { + // Assign the position number to the instruction. Note that we are going to + // move some instructions during the optimization however there will never + // be a need to move two instructions before any selected instruction. So to + // avoid multiple positions' updates during moves we just increase position + // counter by two leaving a free space for instructions which will be moved. + InstrPos[&MI] = Pos += 2; + if (isLEA(MI)) List.push_back(const_cast<MachineInstr *>(&MI)); } @@ -270,6 +359,13 @@ bool OptimizeLEAPass::removeRedundantAddrCalc( if (Dist < 0) { DefMI->removeFromParent(); MBB->insert(MachineBasicBlock::iterator(&MI), DefMI); + InstrPos[DefMI] = InstrPos[&MI] - 1; + + // Make sure the instructions' position numbers are sane. + assert(((InstrPos[DefMI] == 1 && DefMI == MBB->begin()) || + InstrPos[DefMI] > + InstrPos[std::prev(MachineBasicBlock::iterator(DefMI))]) && + "Instruction positioning is broken"); } // Since we can possibly extend register lifetime, clear kill flags. @@ -296,6 +392,81 @@ bool OptimizeLEAPass::removeRedundantAddrCalc( return Changed; } +// Try to find similar LEAs in the list and replace one with another. +bool +OptimizeLEAPass::removeRedundantLEAs(SmallVectorImpl<MachineInstr *> &List) { + bool Changed = false; + + // Loop over all LEA pairs. + auto I1 = List.begin(); + while (I1 != List.end()) { + MachineInstr &First = **I1; + auto I2 = std::next(I1); + while (I2 != List.end()) { + MachineInstr &Last = **I2; + int64_t AddrDispShift; + + // LEAs should be in occurence order in the list, so we can freely + // replace later LEAs with earlier ones. + assert(calcInstrDist(First, Last) > 0 && + "LEAs must be in occurence order in the list"); + + // Check that the Last LEA instruction can be replaced by the First. + if (!isReplaceable(First, Last, AddrDispShift)) { + ++I2; + continue; + } + + // Loop over all uses of the Last LEA and update their operands. Note that + // the correctness of this has already been checked in the isReplaceable + // function. + for (auto UI = MRI->use_begin(Last.getOperand(0).getReg()), + UE = MRI->use_end(); + UI != UE;) { + MachineOperand &MO = *UI++; + MachineInstr &MI = *MO.getParent(); + + // Get the number of the first memory operand. + const MCInstrDesc &Desc = MI.getDesc(); + int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()) + + X86II::getOperandBias(Desc); + + // Update address base. + MO.setReg(First.getOperand(0).getReg()); + + // Update address disp. + MachineOperand *Op = &MI.getOperand(MemOpNo + X86::AddrDisp); + if (Op->isImm()) + Op->setImm(Op->getImm() + AddrDispShift); + else if (Op->isGlobal()) + Op->setOffset(Op->getOffset() + AddrDispShift); + else + llvm_unreachable("Invalid address displacement operand"); + } + + // Since we can possibly extend register lifetime, clear kill flags. + MRI->clearKillFlags(First.getOperand(0).getReg()); + + ++NumRedundantLEAs; + DEBUG(dbgs() << "OptimizeLEAs: Remove redundant LEA: "; Last.dump();); + + // By this moment, all of the Last LEA's uses must be replaced. So we can + // freely remove it. + assert(MRI->use_empty(Last.getOperand(0).getReg()) && + "The LEA's def register must have no uses"); + Last.eraseFromParent(); + + // Erase removed LEA from the list. + I2 = List.erase(I2); + + Changed = true; + } + ++I1; + } + + return Changed; +} + bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; @@ -310,6 +481,7 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { // Process all basic blocks. for (auto &MBB : MF) { SmallVector<MachineInstr *, 16> LEAs; + InstrPos.clear(); // Find all LEA instructions in basic block. findLEAs(MBB, LEAs); @@ -318,6 +490,11 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { if (LEAs.empty()) continue; + // Remove redundant LEA instructions. The optimization may have a negative + // effect on performance, so do it only for -Oz. + if (MF.getFunction()->optForMinSize()) + Changed |= removeRedundantLEAs(LEAs); + // Remove redundant address calculations. Changed |= removeRedundantAddrCalc(LEAs); } diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp index 5840443..274b566 100644 --- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -250,7 +250,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_64_RT_AllRegs_SaveList; case CallingConv::CXX_FAST_TLS: if (Is64Bit) - return CSR_64_TLS_Darwin_SaveList; + return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ? + CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList; break; case CallingConv::Intel_OCL_BI: { if (HasAVX512 && IsWin64) @@ -305,6 +306,15 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_32_SaveList; } +const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy( + const MachineFunction *MF) const { + assert(MF && "Invalid MachineFunction pointer."); + if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && + MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR()) + return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList; + return nullptr; +} + const uint32_t * X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h index f014c8f..8d0094c 100644 --- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h @@ -99,6 +99,8 @@ public: /// callee-save registers on this target. const MCPhysReg * getCalleeSavedRegs(const MachineFunction* MF) const override; + const MCPhysReg * + getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override; const uint32_t *getNoPreservedMask() const override; diff --git a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp index 816291d..6df0447 100644 --- a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp @@ -22,7 +22,7 @@ static cl::list<std::string> ForceAttributes("force-attribute", cl::Hidden, cl::desc("Add an attribute to a function. This should be a " "pair of 'function-name:attribute-name', for " - "example -force-add-attribute=foo:noinline. This " + "example -force-attribute=foo:noinline. This " "option can be specified multiple times.")); static Attribute::AttrKind parseAttrKind(StringRef Kind) { diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 6dcfb3f..527fdd1 100644 --- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -6,16 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file implements a simple interprocedural pass which walks the -// call-graph, looking for functions which do not access or only read -// non-local memory, and marking them readnone/readonly. It does the -// same with function arguments independently, marking them readonly/ -// readnone/nocapture. Finally, well-known library call declarations -// are marked with all attributes that are consistent with the -// function's standard definition. This pass is implemented as a -// bottom-up traversal of the call-graph. -// +/// +/// \file +/// This file implements interprocedural passes which walk the +/// call-graph deducing and/or propagating function attributes. +/// //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO.h" @@ -57,19 +52,14 @@ typedef SmallSetVector<Function *, 8> SCCNodeSet; } namespace { -struct FunctionAttrs : public CallGraphSCCPass { +struct PostOrderFunctionAttrs : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - FunctionAttrs() : CallGraphSCCPass(ID) { - initializeFunctionAttrsPass(*PassRegistry::getPassRegistry()); + PostOrderFunctionAttrs() : CallGraphSCCPass(ID) { + initializePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry()); } bool runOnSCC(CallGraphSCC &SCC) override; - bool doInitialization(CallGraph &CG) override { - Revisit.clear(); - return false; - } - bool doFinalization(CallGraph &CG) override; - + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<AssumptionCacheTracker>(); @@ -79,20 +69,19 @@ struct FunctionAttrs : public CallGraphSCCPass { private: TargetLibraryInfo *TLI; - SmallVector<WeakVH,16> Revisit; }; } -char FunctionAttrs::ID = 0; -INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs", +char PostOrderFunctionAttrs::ID = 0; +INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrs, "functionattrs", "Deduce function attributes", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(FunctionAttrs, "functionattrs", +INITIALIZE_PASS_END(PostOrderFunctionAttrs, "functionattrs", "Deduce function attributes", false, false) -Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); } +Pass *llvm::createPostOrderFunctionAttrsPass() { return new PostOrderFunctionAttrs(); } namespace { /// The three kinds of memory access relevant to 'readonly' and @@ -949,8 +938,7 @@ static bool setDoesNotRecurse(Function &F) { return true; } -static bool addNoRecurseAttrs(const CallGraphSCC &SCC, - SmallVectorImpl<WeakVH> &Revisit) { +static bool addNoRecurseAttrs(const CallGraphSCC &SCC) { // Try and identify functions that do not recurse. // If the SCC contains multiple nodes we know for sure there is recursion. @@ -973,32 +961,11 @@ static bool addNoRecurseAttrs(const CallGraphSCC &SCC, // Function calls a potentially recursive function. return setDoesNotRecurse(*F); - // We know that F is not obviously recursive, but we haven't been able to - // prove that it doesn't actually recurse. Add it to the Revisit list to try - // again top-down later. - Revisit.push_back(F); + // Nothing else we can deduce usefully during the postorder traversal. return false; } -static bool addNoRecurseAttrsTopDownOnly(Function *F) { - // If F is internal and all uses are in norecurse functions, then F is also - // norecurse. - if (F->doesNotRecurse()) - return false; - if (F->hasInternalLinkage()) { - for (auto *U : F->users()) - if (auto *I = dyn_cast<Instruction>(U)) { - if (!I->getParent()->getParent()->doesNotRecurse()) - return false; - } else { - return false; - } - return setDoesNotRecurse(*F); - } - return false; -} - -bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { +bool PostOrderFunctionAttrs::runOnSCC(CallGraphSCC &SCC) { TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); bool Changed = false; @@ -1040,19 +1007,100 @@ bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { Changed |= addNoAliasAttrs(SCCNodes); Changed |= addNonNullAttrs(SCCNodes, *TLI); } - - Changed |= addNoRecurseAttrs(SCC, Revisit); + + Changed |= addNoRecurseAttrs(SCC); return Changed; } -bool FunctionAttrs::doFinalization(CallGraph &CG) { +namespace { +/// A pass to do RPO deduction and propagation of function attributes. +/// +/// This pass provides a general RPO or "top down" propagation of +/// function attributes. For a few (rare) cases, we can deduce significantly +/// more about function attributes by working in RPO, so this pass +/// provides the compliment to the post-order pass above where the majority of +/// deduction is performed. +// FIXME: Currently there is no RPO CGSCC pass structure to slide into and so +// this is a boring module pass, but eventually it should be an RPO CGSCC pass +// when such infrastructure is available. +struct ReversePostOrderFunctionAttrs : public ModulePass { + static char ID; // Pass identification, replacement for typeid + ReversePostOrderFunctionAttrs() : ModulePass(ID) { + initializeReversePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<CallGraphWrapperPass>(); + } +}; +} + +char ReversePostOrderFunctionAttrs::ID = 0; +INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrs, "rpo-functionattrs", + "Deduce function attributes in RPO", false, false) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_END(ReversePostOrderFunctionAttrs, "rpo-functionattrs", + "Deduce function attributes in RPO", false, false) + +Pass *llvm::createReversePostOrderFunctionAttrsPass() { + return new ReversePostOrderFunctionAttrs(); +} + +static bool addNoRecurseAttrsTopDown(Function &F) { + // We check the preconditions for the function prior to calling this to avoid + // the cost of building up a reversible post-order list. We assert them here + // to make sure none of the invariants this relies on were violated. + assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!"); + assert(!F.doesNotRecurse() && + "This function has already been deduced as norecurs!"); + assert(F.hasInternalLinkage() && + "Can only do top-down deduction for internal linkage functions!"); + + // If F is internal and all of its uses are calls from a non-recursive + // functions, then none of its calls could in fact recurse without going + // through a function marked norecurse, and so we can mark this function too + // as norecurse. Note that the uses must actually be calls -- otherwise + // a pointer to this function could be returned from a norecurse function but + // this function could be recursively (indirectly) called. Note that this + // also detects if F is directly recursive as F is not yet marked as + // a norecurse function. + for (auto *U : F.users()) { + auto *I = dyn_cast<Instruction>(U); + if (!I) + return false; + CallSite CS(I); + if (!CS || !CS.getParent()->getParent()->doesNotRecurse()) + return false; + } + return setDoesNotRecurse(F); +} + +bool ReversePostOrderFunctionAttrs::runOnModule(Module &M) { + // We only have a post-order SCC traversal (because SCCs are inherently + // discovered in post-order), so we accumulate them in a vector and then walk + // it in reverse. This is simpler than using the RPO iterator infrastructure + // because we need to combine SCC detection and the PO walk of the call + // graph. We can also cheat egregiously because we're primarily interested in + // synthesizing norecurse and so we can only save the singular SCCs as SCCs + // with multiple functions in them will clearly be recursive. + auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + SmallVector<Function *, 16> Worklist; + for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) { + if (I->size() != 1) + continue; + + Function *F = I->front()->getFunction(); + if (F && !F->isDeclaration() && !F->doesNotRecurse() && + F->hasInternalLinkage()) + Worklist.push_back(F); + } + bool Changed = false; - // When iterating over SCCs we visit functions in a bottom-up fashion. Some of - // the rules we have for identifying norecurse functions work best with a - // top-down walk, so look again at all the functions we previously marked as - // worth revisiting, in top-down order. - for (auto &F : reverse(Revisit)) - if (F) - Changed |= addNoRecurseAttrsTopDownOnly(cast<Function>((Value*)F)); + for (auto *F : reverse(Worklist)) + Changed |= addNoRecurseAttrsTopDown(*F); + return Changed; } diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp index d8b677b..5e0df95 100644 --- a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -41,15 +41,16 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName, LLVMContext &Context) { SMDiagnostic Err; DEBUG(dbgs() << "Loading '" << FileName << "'\n"); - std::unique_ptr<Module> Result = getLazyIRFileModule(FileName, Err, Context); + // Metadata isn't loaded or linked until after all functions are + // imported, after which it will be materialized and linked. + std::unique_ptr<Module> Result = + getLazyIRFileModule(FileName, Err, Context, + /* ShouldLazyLoadMetadata = */ true); if (!Result) { Err.print("function-import", errs()); return nullptr; } - Result->materializeMetadata(); - UpgradeDebugInfo(*Result); - return Result; } @@ -132,6 +133,8 @@ static void findExternalCalls(const Module &DestModule, Function &F, // Ignore functions already present in the destination module auto *SrcGV = DestModule.getNamedValue(ImportedName); if (SrcGV) { + if (GlobalAlias *SGA = dyn_cast<GlobalAlias>(SrcGV)) + SrcGV = SGA->getBaseObject(); assert(isa<Function>(SrcGV) && "Name collision during import"); if (!cast<Function>(SrcGV)->isDeclaration()) { DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring " @@ -324,6 +327,10 @@ bool FunctionImporter::importFunctions(Module &DestModule) { ModuleToTempMDValsMap) { // Load the specified source module. auto &SrcModule = ModuleLoaderCache(SME.getKey()); + // The modules were created with lazy metadata loading. Materialize it + // now, before linking it. + SrcModule.materializeMetadata(); + UpgradeDebugInfo(SrcModule); // Link in all necessary metadata from this module. if (TheLinker.linkInMetadata(SrcModule, SME.getValue().get())) @@ -408,14 +415,19 @@ public: Index = IndexPtr.get(); } + // First we need to promote to global scope and rename any local values that + // are potentially exported to other modules. + if (renameModuleForThinLTO(M, Index)) { + errs() << "Error renaming module\n"; + return false; + } + // Perform the import now. auto ModuleLoader = [&M](StringRef Identifier) { return loadFile(Identifier, M.getContext()); }; FunctionImporter Importer(*Index, ModuleLoader); return Importer.importFunctions(M); - - return false; } }; } // anonymous namespace diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp index 7ea6c08..89629cf0 100644 --- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp +++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp @@ -28,7 +28,6 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeDAEPass(Registry); initializeDAHPass(Registry); initializeForceFunctionAttrsLegacyPassPass(Registry); - initializeFunctionAttrsPass(Registry); initializeGlobalDCEPass(Registry); initializeGlobalOptPass(Registry); initializeIPCPPass(Registry); @@ -42,6 +41,8 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeLowerBitSetsPass(Registry); initializeMergeFunctionsPass(Registry); initializePartialInlinerPass(Registry); + initializePostOrderFunctionAttrsPass(Registry); + initializeReversePostOrderFunctionAttrsPass(Registry); initializePruneEHPass(Registry); initializeStripDeadPrototypesLegacyPassPass(Registry); initializeStripSymbolsPass(Registry); @@ -71,7 +72,7 @@ void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) { } void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createFunctionAttrsPass()); + unwrap(PM)->add(createPostOrderFunctionAttrsPass()); } void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) { diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp index 8e4ad64..3c6a7bb 100644 --- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp +++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp @@ -38,7 +38,7 @@ namespace { static char ID; // Pass identification, replacement for typeid unsigned NumLoops; - explicit LoopExtractor(unsigned numLoops = ~0) + explicit LoopExtractor(unsigned numLoops = ~0) : LoopPass(ID), NumLoops(numLoops) { initializeLoopExtractorPass(*PassRegistry::getPassRegistry()); } @@ -143,7 +143,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) { Changed = true; // After extraction, the loop is replaced by a function call, so // we shouldn't try to run any more loop passes on it. - LI.updateUnloop(L); + LI.markAsRemoved(L); } ++NumExtracted; } diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 9876efa..faada9c 100644 --- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -251,7 +251,7 @@ void PassManagerBuilder::populateModulePassManager( Inliner = nullptr; } if (!DisableUnitAtATime) - MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs + MPM.add(createPostOrderFunctionAttrsPass()); if (OptLevel > 2) MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args @@ -346,6 +346,9 @@ void PassManagerBuilder::populateModulePassManager( // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + if (!DisableUnitAtATime) + MPM.add(createReversePostOrderFunctionAttrsPass()); + if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) { // Remove avail extern fns and globals definitions if we aren't // compiling an object file for later LTO. For LTO we want to preserve @@ -502,7 +505,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createIPSCCPPass()); // Now that we internalized some globals, see if we can hack on them! - PM.add(createFunctionAttrsPass()); // Add norecurse if possible. + PM.add(createPostOrderFunctionAttrsPass()); + PM.add(createReversePostOrderFunctionAttrsPass()); PM.add(createGlobalOptimizerPass()); // Promote any localized global vars. PM.add(createPromoteMemoryToRegisterPass()); @@ -551,7 +555,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createScalarReplAggregatesPass()); // Run a few AA driven optimizations here and now, to cleanup the code. - PM.add(createFunctionAttrsPass()); // Add nocapture. + PM.add(createPostOrderFunctionAttrsPass()); // Add nocapture. PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. PM.add(createLICMPass()); // Hoist loop invariants. diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 7ad0efc..160792b 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -636,7 +636,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { // if pattern detected emit alternate sequence if (OpX && OpY) { BuilderTy::FastMathFlagGuard Guard(*Builder); - Builder->SetFastMathFlags(Log2->getFastMathFlags()); + Builder->setFastMathFlags(Log2->getFastMathFlags()); Log2->setArgOperand(0, OpY); Value *FMulVal = Builder->CreateFMul(OpX, Log2); Value *FSub = Builder->CreateFSub(FMulVal, OpX); @@ -652,7 +652,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { bool IgnoreZeroSign = I.hasNoSignedZeros(); if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) { BuilderTy::FastMathFlagGuard Guard(*Builder); - Builder->SetFastMathFlags(I.getFastMathFlags()); + Builder->setFastMathFlags(I.getFastMathFlags()); Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign); Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign); @@ -693,7 +693,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (Y) { BuilderTy::FastMathFlagGuard Guard(*Builder); - Builder->SetFastMathFlags(I.getFastMathFlags()); + Builder->setFastMathFlags(I.getFastMathFlags()); Value *T = Builder->CreateFMul(Opnd1, Opnd1); Value *R = Builder->CreateFMul(T, Y); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 776704d..51219bc 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -930,7 +930,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) { FCmpInst::Predicate InvPred = FCI->getInversePredicate(); IRBuilder<>::FastMathFlagGuard FMFG(*Builder); - Builder->SetFastMathFlags(FCI->getFastMathFlags()); + Builder->setFastMathFlags(FCI->getFastMathFlags()); Value *NewCond = Builder->CreateFCmp(InvPred, TrueVal, FalseVal, FCI->getName() + ".inv"); @@ -973,7 +973,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) { FCmpInst::Predicate InvPred = FCI->getInversePredicate(); IRBuilder<>::FastMathFlagGuard FMFG(*Builder); - Builder->SetFastMathFlags(FCI->getFastMathFlags()); + Builder->setFastMathFlags(FCI->getFastMathFlags()); Value *NewCond = Builder->CreateFCmp(InvPred, FalseVal, TrueVal, FCI->getName() + ".inv"); @@ -1082,7 +1082,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } else { IRBuilder<>::FastMathFlagGuard FMFG(*Builder); auto FMF = cast<FPMathOperator>(SI.getCondition())->getFastMathFlags(); - Builder->SetFastMathFlags(FMF); + Builder->setFastMathFlags(FMF); Cmp = Builder->CreateFCmp(Pred, LHS, RHS); } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 54a9fbd..5cde31a 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -384,23 +384,20 @@ static void replaceExtractElements(InsertElementInst *InsElt, ConstantVector::get(ExtendMask)); // Insert the new shuffle after the vector operand of the extract is defined - // or at the start of the basic block, so any subsequent extracts can use it. - bool ReplaceAllExtUsers; - if (auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp)) { + // (as long as it's not a PHI) or at the start of the basic block of the + // extract, so any subsequent extracts in the same basic block can use it. + // TODO: Insert before the earliest ExtractElementInst that is replaced. + auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp); + if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst)) WideVec->insertAfter(ExtVecOpInst); - ReplaceAllExtUsers = true; - } else { - // TODO: Insert at start of function, so it's always safe to replace all? + else IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt()); - ReplaceAllExtUsers = false; - } // Replace extracts from the original narrow vector with extracts from the new // wide vector. for (User *U : ExtVecOp->users()) { ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U); - if (!OldExt || - (!ReplaceAllExtUsers && OldExt->getParent() != WideVec->getParent())) + if (!OldExt || OldExt->getParent() != WideVec->getParent()) continue; auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1)); NewExt->insertAfter(WideVec); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 51ff95d..28483e7 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -93,8 +93,8 @@ private: /// Replace instrprof_increment with an increment of the appropriate value. void lowerIncrement(InstrProfIncrementInst *Inc); - /// Set up the section and uses for coverage data and its references. - void lowerCoverageData(GlobalVariable *CoverageData); + /// Force emitting of name vars for unused functions. + void lowerCoverageData(GlobalVariable *CoverageNamesVar); /// Get the region counters for an increment, creating them if necessary. /// @@ -156,9 +156,9 @@ bool InstrProfiling::runOnModule(Module &M) { } } - if (GlobalVariable *Coverage = - M.getNamedGlobal(getCoverageMappingVarName())) { - lowerCoverageData(Coverage); + if (GlobalVariable *CoverageNamesVar = + M.getNamedGlobal(getCoverageNamesVarName())) { + lowerCoverageData(CoverageNamesVar); MadeChange = true; } @@ -233,28 +233,16 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { Inc->eraseFromParent(); } -void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) { - - Constant *Init = CoverageData->getInitializer(); - // We're expecting { [4 x 32], [n x { i8*, i32, i32 }], [m x i8] } - // for some C. If not, the frontend's given us something broken. - assert(Init->getNumOperands() == 3 && "bad number of fields in coverage map"); - assert(isa<ConstantArray>(Init->getAggregateElement(1)) && - "invalid function list in coverage map"); - ConstantArray *Records = cast<ConstantArray>(Init->getAggregateElement(1)); - for (unsigned I = 0, E = Records->getNumOperands(); I < E; ++I) { - Constant *Record = Records->getOperand(I); - Value *V = const_cast<Value *>(Record->getOperand(0))->stripPointerCasts(); +void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) { + ConstantArray *Names = + cast<ConstantArray>(CoverageNamesVar->getInitializer()); + for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) { + Constant *NC = Names->getOperand(I); + Value *V = NC->stripPointerCasts(); assert(isa<GlobalVariable>(V) && "Missing reference to function name"); GlobalVariable *Name = cast<GlobalVariable>(V); - // If we have region counters for this name, we've already handled it. - auto It = ProfileDataMap.find(Name); - if (It != ProfileDataMap.end()) - if (It->second.RegionCounters) - continue; - // Move the name variable to the right section. Name->setSection(getNameSection()); Name->setAlignment(1); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 5a7bce5..34aaa7f 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -692,7 +692,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { const DataLayout &DL = F.getParent()->getDataLayout(); unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment); unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType()); - if (isa<StructType>(Shadow->getType())) { + if (Shadow->getType()->isAggregateType()) { paintOrigin(IRB, updateOrigin(Origin, IRB), getOriginPtr(Addr, IRB, Alignment), StoreSize, OriginAlignment); diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 087ce8a..dcdcfed 100644 --- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -100,9 +100,9 @@ namespace { std::unique_ptr<BranchProbabilityInfo> BPI; bool HasProfileData; #ifdef NDEBUG - SmallPtrSet<BasicBlock*, 16> LoopHeaders; + SmallPtrSet<const BasicBlock *, 16> LoopHeaders; #else - SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders; + SmallSet<AssertingVH<const BasicBlock>, 16> LoopHeaders; #endif DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet; @@ -163,6 +163,7 @@ namespace { bool SimplifyPartiallyRedundantLoad(LoadInst *LI); bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB); + bool TryToUnfoldSelectInCurrBB(BasicBlock *BB); private: BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, @@ -210,11 +211,12 @@ bool JumpThreading::runOnFunction(Function &F) { // we will loop forever. We take care of this issue by not jump threading for // back edges. This works for normal cases but not for unreachable blocks as // they may have cycle with no back edge. - removeUnreachableBlocks(F); + bool EverChanged = false; + EverChanged |= removeUnreachableBlocks(F, LVI); FindLoopHeaders(F); - bool Changed, EverChanged = false; + bool Changed; do { Changed = false; for (Function::iterator I = F.begin(), E = F.end(); I != E;) { @@ -363,8 +365,8 @@ void JumpThreading::FindLoopHeaders(Function &F) { SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges; FindFunctionBackedges(F, Edges); - for (unsigned i = 0, e = Edges.size(); i != e; ++i) - LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second)); + for (const auto &Edge : Edges) + LoopHeaders.insert(Edge.second); } /// getKnownConstant - Helper method to determine if we can thread over a @@ -410,8 +412,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, // If V is a constant, then it is known in all predecessors. if (Constant *KC = getKnownConstant(V, Preference)) { - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - Result.push_back(std::make_pair(KC, *PI)); + for (BasicBlock *Pred : predecessors(BB)) + Result.push_back(std::make_pair(KC, Pred)); return true; } @@ -434,8 +436,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, // "X < 4" and "X < 3" is known true but "X < 4" itself is not available. // Perhaps getConstantOnEdge should be smart enough to do this? - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *P = *PI; + for (BasicBlock *P : predecessors(BB)) { // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI); @@ -491,22 +492,17 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, // Scan for the sentinel. If we find an undef, force it to the // interesting value: x|undef -> true and x&undef -> false. - for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) - if (LHSVals[i].first == InterestingVal || - isa<UndefValue>(LHSVals[i].first)) { - Result.push_back(LHSVals[i]); - Result.back().first = InterestingVal; - LHSKnownBBs.insert(LHSVals[i].second); + for (const auto &LHSVal : LHSVals) + if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) { + Result.emplace_back(InterestingVal, LHSVal.second); + LHSKnownBBs.insert(LHSVal.second); } - for (unsigned i = 0, e = RHSVals.size(); i != e; ++i) - if (RHSVals[i].first == InterestingVal || - isa<UndefValue>(RHSVals[i].first)) { + for (const auto &RHSVal : RHSVals) + if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) { // If we already inferred a value for this block on the LHS, don't // re-add it. - if (!LHSKnownBBs.count(RHSVals[i].second)) { - Result.push_back(RHSVals[i]); - Result.back().first = InterestingVal; - } + if (!LHSKnownBBs.count(RHSVal.second)) + Result.emplace_back(InterestingVal, RHSVal.second); } return !Result.empty(); @@ -522,8 +518,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, return false; // Invert the known values. - for (unsigned i = 0, e = Result.size(); i != e; ++i) - Result[i].first = ConstantExpr::getNot(Result[i].first); + for (auto &R : Result) + R.first = ConstantExpr::getNot(R.first); return true; } @@ -538,12 +534,12 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, WantInteger, CxtI); // Try to use constant folding to simplify the binary operator. - for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) { - Constant *V = LHSVals[i].first; + for (const auto &LHSVal : LHSVals) { + Constant *V = LHSVal.first; Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI); if (Constant *KC = getKnownConstant(Folded, WantInteger)) - Result.push_back(std::make_pair(KC, LHSVals[i].second)); + Result.push_back(std::make_pair(KC, LHSVal.second)); } } @@ -591,8 +587,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) { Constant *RHSCst = cast<Constant>(Cmp->getOperand(1)); - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){ - BasicBlock *P = *PI; + for (BasicBlock *P : predecessors(BB)) { // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. LazyValueInfo::Tristate Res = @@ -615,12 +610,12 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, WantInteger, CxtI); - for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) { - Constant *V = LHSVals[i].first; + for (const auto &LHSVal : LHSVals) { + Constant *V = LHSVal.first; Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(), V, CmpConst); if (Constant *KC = getKnownConstant(Folded, WantInteger)) - Result.push_back(std::make_pair(KC, LHSVals[i].second)); + Result.push_back(std::make_pair(KC, LHSVal.second)); } return !Result.empty(); @@ -637,8 +632,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, if ((TrueVal || FalseVal) && ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds, WantInteger, CxtI)) { - for (unsigned i = 0, e = Conds.size(); i != e; ++i) { - Constant *Cond = Conds[i].first; + for (auto &C : Conds) { + Constant *Cond = C.first; // Figure out what value to use for the condition. bool KnownCond; @@ -655,7 +650,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, // See if the select has a known constant value for this predecessor. if (Constant *Val = KnownCond ? TrueVal : FalseVal) - Result.push_back(std::make_pair(Val, Conds[i].second)); + Result.push_back(std::make_pair(Val, C.second)); } return !Result.empty(); @@ -665,8 +660,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, // If all else fails, see if LVI can figure out a constant value for us. Constant *CI = LVI->getConstant(V, BB, CxtI); if (Constant *KC = getKnownConstant(CI, Preference)) { - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - Result.push_back(std::make_pair(KC, *PI)); + for (BasicBlock *Pred : predecessors(BB)) + Result.push_back(std::make_pair(KC, Pred)); } return !Result.empty(); @@ -736,6 +731,9 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { } } + if (TryToUnfoldSelectInCurrBB(BB)) + return true; + // What kind of constant we're looking for. ConstantPreference Preference = WantInteger; @@ -988,10 +986,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // If we got here, the loaded value is transparent through to the start of the // block. Check to see if it is available in any of the predecessor blocks. - for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); - PI != PE; ++PI) { - BasicBlock *PredBB = *PI; - + for (BasicBlock *PredBB : predecessors(LoadBB)) { // If we already scanned this predecessor, skip it. if (!PredsScanned.insert(PredBB).second) continue; @@ -1038,13 +1033,11 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { SmallVector<BasicBlock*, 8> PredsToSplit; SmallPtrSet<BasicBlock*, 8> AvailablePredSet; - for (unsigned i = 0, e = AvailablePreds.size(); i != e; ++i) - AvailablePredSet.insert(AvailablePreds[i].first); + for (const auto &AvailablePred : AvailablePreds) + AvailablePredSet.insert(AvailablePred.first); // Add all the unavailable predecessors to the PredsToSplit list. - for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); - PI != PE; ++PI) { - BasicBlock *P = *PI; + for (BasicBlock *P : predecessors(LoadBB)) { // If the predecessor is an indirect goto, we can't split the edge. if (isa<IndirectBrInst>(P->getTerminator())) return false; @@ -1129,9 +1122,9 @@ FindMostPopularDest(BasicBlock *BB, // blocks with known and real destinations to threading undef. We'll handle // them later if interesting. DenseMap<BasicBlock*, unsigned> DestPopularity; - for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i) - if (PredToDestList[i].second) - DestPopularity[PredToDestList[i].second]++; + for (const auto &PredToDest : PredToDestList) + if (PredToDest.second) + DestPopularity[PredToDest.second]++; // Find the most popular dest. DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin(); @@ -1194,10 +1187,10 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, "ComputeValueKnownInPredecessors returned true with no values"); DEBUG(dbgs() << "IN BB: " << *BB; - for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { + for (const auto &PredValue : PredValues) { dbgs() << " BB '" << BB->getName() << "': FOUND condition = " - << *PredValues[i].first - << " for pred '" << PredValues[i].second->getName() << "'.\n"; + << *PredValue.first + << " for pred '" << PredValue.second->getName() << "'.\n"; }); // Decide what we want to thread through. Convert our list of known values to @@ -1210,8 +1203,8 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, BasicBlock *OnlyDest = nullptr; BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL; - for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { - BasicBlock *Pred = PredValues[i].second; + for (const auto &PredValue : PredValues) { + BasicBlock *Pred = PredValue.second; if (!SeenPreds.insert(Pred).second) continue; // Duplicate predecessor entry. @@ -1220,7 +1213,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, if (isa<IndirectBrInst>(Pred->getTerminator())) continue; - Constant *Val = PredValues[i].first; + Constant *Val = PredValue.first; BasicBlock *DestBB; if (isa<UndefValue>(Val)) @@ -1260,16 +1253,15 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, // Now that we know what the most popular destination is, factor all // predecessors that will jump to it into a single predecessor. SmallVector<BasicBlock*, 16> PredsToFactor; - for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i) - if (PredToDestList[i].second == MostPopularDest) { - BasicBlock *Pred = PredToDestList[i].first; + for (const auto &PredToDest : PredToDestList) + if (PredToDest.second == MostPopularDest) { + BasicBlock *Pred = PredToDest.first; // This predecessor may be a switch or something else that has multiple // edges to the block. Factor each of these edges by listing them // according to # occurrences in PredsToFactor. - TerminatorInst *PredTI = Pred->getTerminator(); - for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i) - if (PredTI->getSuccessor(i) == BB) + for (BasicBlock *Succ : successors(Pred)) + if (Succ == BB) PredsToFactor.push_back(Pred); } @@ -1366,11 +1358,11 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { // Scan the information to see which is most popular: true or false. The // predecessors can be of the set true, false, or undef. unsigned NumTrue = 0, NumFalse = 0; - for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) { - if (isa<UndefValue>(XorOpValues[i].first)) + for (const auto &XorOpValue : XorOpValues) { + if (isa<UndefValue>(XorOpValue.first)) // Ignore undefs for the count. continue; - if (cast<ConstantInt>(XorOpValues[i].first)->isZero()) + if (cast<ConstantInt>(XorOpValue.first)->isZero()) ++NumFalse; else ++NumTrue; @@ -1386,12 +1378,11 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { // Collect all of the blocks that this can be folded into so that we can // factor this once and clone it once. SmallVector<BasicBlock*, 8> BlocksToFoldInto; - for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) { - if (XorOpValues[i].first != SplitVal && - !isa<UndefValue>(XorOpValues[i].first)) + for (const auto &XorOpValue : XorOpValues) { + if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first)) continue; - BlocksToFoldInto.push_back(XorOpValues[i].second); + BlocksToFoldInto.push_back(XorOpValue.second); } // If we inferred a value for all of the predecessors, then duplication won't @@ -1543,10 +1534,10 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // PHI insertion, of which we are prepared to do, clean these up now. SSAUpdater SSAUpdate; SmallVector<Use*, 16> UsesToRename; - for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + for (Instruction &I : *BB) { // Scan all uses of this instruction to see if it is used outside of its // block, and if so, record them in UsesToRename. - for (Use &U : I->uses()) { + for (Use &U : I.uses()) { Instruction *User = cast<Instruction>(U.getUser()); if (PHINode *UserPN = dyn_cast<PHINode>(User)) { if (UserPN->getIncomingBlock(U) == BB) @@ -1561,14 +1552,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, if (UsesToRename.empty()) continue; - DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n"); + DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n"); // We found a use of I outside of BB. Rename all uses of I that are outside // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks // with the two values we know. - SSAUpdate.Initialize(I->getType(), I->getName()); - SSAUpdate.AddAvailableValue(BB, &*I); - SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&*I]); + SSAUpdate.Initialize(I.getType(), I.getName()); + SSAUpdate.AddAvailableValue(BB, &I); + SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]); while (!UsesToRename.empty()) SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); @@ -1644,10 +1635,10 @@ void JumpThreading::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB, // Collect updated outgoing edges' frequencies from BB and use them to update // edge probabilities. SmallVector<uint64_t, 4> BBSuccFreq; - for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - auto SuccFreq = (*I == SuccBB) + for (BasicBlock *Succ : successors(BB)) { + auto SuccFreq = (Succ == SuccBB) ? BB2SuccBBFreq - NewBBFreq - : BBOrigFreq * BPI->getEdgeProbability(BB, *I); + : BBOrigFreq * BPI->getEdgeProbability(BB, Succ); BBSuccFreq.push_back(SuccFreq.getFrequency()); } @@ -1783,10 +1774,10 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, // PHI insertion, of which we are prepared to do, clean these up now. SSAUpdater SSAUpdate; SmallVector<Use*, 16> UsesToRename; - for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + for (Instruction &I : *BB) { // Scan all uses of this instruction to see if it is used outside of its // block, and if so, record them in UsesToRename. - for (Use &U : I->uses()) { + for (Use &U : I.uses()) { Instruction *User = cast<Instruction>(U.getUser()); if (PHINode *UserPN = dyn_cast<PHINode>(User)) { if (UserPN->getIncomingBlock(U) == BB) @@ -1801,14 +1792,14 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, if (UsesToRename.empty()) continue; - DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n"); + DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n"); // We found a use of I outside of BB. Rename all uses of I that are outside // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks // with the two values we know. - SSAUpdate.Initialize(I->getType(), I->getName()); - SSAUpdate.AddAvailableValue(BB, &*I); - SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&*I]); + SSAUpdate.Initialize(I.getType(), I.getName()); + SSAUpdate.AddAvailableValue(BB, &I); + SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&I]); while (!UsesToRename.empty()) SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); @@ -1903,3 +1894,62 @@ bool JumpThreading::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { } return false; } + +/// TryToUnfoldSelectInCurrBB - Look for PHI/Select in the same BB of the form +/// bb: +/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ... +/// %s = select p, trueval, falseval +/// +/// And expand the select into a branch structure. This later enables +/// jump-threading over bb in this pass. +/// +/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold +/// select if the associated PHI has at least one constant. If the unfolded +/// select is not jump-threaded, it will be folded again in the later +/// optimizations. +bool JumpThreading::TryToUnfoldSelectInCurrBB(BasicBlock *BB) { + // If threading this would thread across a loop header, don't thread the edge. + // See the comments above FindLoopHeaders for justifications and caveats. + if (LoopHeaders.count(BB)) + return false; + + // Look for a Phi/Select pair in the same basic block. The Phi feeds the + // condition of the Select and at least one of the incoming values is a + // constant. + for (BasicBlock::iterator BI = BB->begin(); + PHINode *PN = dyn_cast<PHINode>(BI); ++BI) { + unsigned NumPHIValues = PN->getNumIncomingValues(); + if (NumPHIValues == 0 || !PN->hasOneUse()) + continue; + + SelectInst *SI = dyn_cast<SelectInst>(PN->user_back()); + if (!SI || SI->getParent() != BB) + continue; + + Value *Cond = SI->getCondition(); + if (!Cond || Cond != PN || !Cond->getType()->isIntegerTy(1)) + continue; + + bool HasConst = false; + for (unsigned i = 0; i != NumPHIValues; ++i) { + if (PN->getIncomingBlock(i) == BB) + return false; + if (isa<ConstantInt>(PN->getIncomingValue(i))) + HasConst = true; + } + + if (HasConst) { + // Expand the select. + TerminatorInst *Term = + SplitBlockAndInsertIfThen(SI->getCondition(), SI, false); + PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI); + NewPN->addIncoming(SI->getTrueValue(), Term->getParent()); + NewPN->addIncoming(SI->getFalseValue(), BB); + SI->replaceAllUsesWith(NewPN); + SI->eraseFromParent(); + return true; + } + } + + return false; +} diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp index e01e23f..8923ff7 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp @@ -203,9 +203,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { CurAST = new AliasSetTracker(*AA); // Collect Alias info from subloops. - for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end(); - LoopItr != LoopItrE; ++LoopItr) { - Loop *InnerL = *LoopItr; + for (Loop *InnerL : L->getSubLoops()) { AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL]; assert(InnerAST && "Where is my AST?"); @@ -227,9 +225,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // Because subloops have already been incorporated into AST, we skip blocks in // subloops. // - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) { - BasicBlock *BB = *I; + for (BasicBlock *BB : L->blocks()) { if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops. CurAST->add(*BB); // Incorporate the specified basic block } @@ -263,9 +259,8 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { PredIteratorCache PIC; // Loop over all of the alias sets in the tracker object. - for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); - I != E; ++I) - Changed |= promoteLoopAccessesToScalars(*I, ExitBlocks, InsertPts, + for (AliasSet &AS : *CurAST) + Changed |= promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts, PIC, LI, DT, CurLoop, CurAST, &SafetyInfo); @@ -324,9 +319,9 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // We are processing blocks in reverse dfo, so process children first. const std::vector<DomTreeNode*> &Children = N->getChildren(); - for (unsigned i = 0, e = Children.size(); i != e; ++i) - Changed |= - sinkRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); + for (DomTreeNode *Child : Children) + Changed |= sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); + // Only need to process the contents of this block if it is not part of a // subloop (which would already have been processed). if (inSubLoop(BB,CurLoop,LI)) return Changed; @@ -407,9 +402,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, } const std::vector<DomTreeNode*> &Children = N->getChildren(); - for (unsigned i = 0, e = Children.size(); i != e; ++i) - Changed |= - hoistRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); + for (DomTreeNode *Child : Children) + Changed |= hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); return Changed; } @@ -499,9 +493,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT, // If this call only reads from memory and there are no writes to memory // in the loop, we can hoist or sink the call as appropriate. bool FoundMod = false; - for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); - I != E; ++I) { - AliasSet &AS = *I; + for (AliasSet &AS : *CurAST) { if (!AS.isForwardingAliasSet() && AS.isMod()) { FoundMod = true; break; @@ -783,8 +775,8 @@ static bool isGuaranteedToExecute(const Instruction &Inst, CurLoop->getExitBlocks(ExitBlocks); // Verify that the block dominates each of the exit blocks of the loop. - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - if (!DT->dominates(Inst.getParent(), ExitBlocks[i])) + for (BasicBlock *ExitBlock : ExitBlocks) + if (!DT->dominates(Inst.getParent(), ExitBlock)) return false; // As a degenerate case, if the loop is statically infinite then we haven't @@ -951,17 +943,17 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, // If there is an non-load/store instruction in the loop, we can't promote // it. - if (const LoadInst *load = dyn_cast<LoadInst>(UI)) { - assert(!load->isVolatile() && "AST broken"); - if (!load->isSimple()) + if (const LoadInst *Load = dyn_cast<LoadInst>(UI)) { + assert(!Load->isVolatile() && "AST broken"); + if (!Load->isSimple()) return Changed; - } else if (const StoreInst *store = dyn_cast<StoreInst>(UI)) { + } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) { // Stores *of* the pointer are not interesting, only stores *to* the // pointer. if (UI->getOperand(1) != ASIV) continue; - assert(!store->isVolatile() && "AST broken"); - if (!store->isSimple()) + assert(!Store->isVolatile() && "AST broken"); + if (!Store->isSimple()) return Changed; // Don't sink stores from loops without dedicated block exits. Exits // containing indirect branches are not transformed by loop simplify, @@ -979,7 +971,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, // restrictive (and performant) alignment and if we are sure this // instruction will be executed, update the alignment. // Larger is better, with the exception of 0 being the best alignment. - unsigned InstAlignment = store->getAlignment(); + unsigned InstAlignment = Store->getAlignment(); if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0) if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) { GuaranteedToExecute = true; diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp index bc00ff3..7b1940b 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp @@ -245,7 +245,7 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &) { loopInfo.removeBlock(BB); // The last step is to update LoopInfo now that we've eliminated this loop. - loopInfo.updateUnloop(L); + loopInfo.markAsRemoved(L); Changed = true; ++NumDeleted; diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 56ae5c0..ecef6db 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -39,16 +39,16 @@ using namespace llvm; #define DEBUG_TYPE "loop-unroll" static cl::opt<unsigned> - UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden, + UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The baseline cost threshold for loop unrolling")); static cl::opt<unsigned> UnrollPercentDynamicCostSavedThreshold( - "unroll-percent-dynamic-cost-saved-threshold", cl::init(20), cl::Hidden, + "unroll-percent-dynamic-cost-saved-threshold", cl::Hidden, cl::desc("The percentage of estimated dynamic cost which must be saved by " "unrolling to allow unrolling up to the max threshold.")); static cl::opt<unsigned> UnrollDynamicCostSavingsDiscount( - "unroll-dynamic-cost-savings-discount", cl::init(2000), cl::Hidden, + "unroll-dynamic-cost-savings-discount", cl::Hidden, cl::desc("This is the amount discounted from the total unroll cost when " "the unrolled form has a high dynamic cost savings (triggered by " "the '-unroll-perecent-dynamic-cost-saved-threshold' flag).")); @@ -59,17 +59,17 @@ static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze( "iterations when checking full unroll profitability")); static cl::opt<unsigned> -UnrollCount("unroll-count", cl::init(0), cl::Hidden, +UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes")); static cl::opt<bool> -UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, +UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached.")); static cl::opt<bool> -UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden, +UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, cl::desc("Unroll loops with run-time trip counts")); static cl::opt<unsigned> @@ -77,182 +77,95 @@ PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma.")); -namespace { - class LoopUnroll : public LoopPass { - public: - static char ID; // Pass ID, replacement for typeid - LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) { - CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T); - CurrentPercentDynamicCostSavedThreshold = - UnrollPercentDynamicCostSavedThreshold; - CurrentDynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount; - CurrentCount = (C == -1) ? UnrollCount : unsigned(C); - CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P; - CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R; - - UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0); - UserPercentDynamicCostSavedThreshold = - (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0); - UserDynamicCostSavingsDiscount = - (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0); - UserAllowPartial = (P != -1) || - (UnrollAllowPartial.getNumOccurrences() > 0); - UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0); - UserCount = (C != -1) || (UnrollCount.getNumOccurrences() > 0); - - initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); - } - /// A magic value for use with the Threshold parameter to indicate - /// that the loop unroll should be performed regardless of how much - /// code expansion would result. - static const unsigned NoThreshold = UINT_MAX; - - // Threshold to use when optsize is specified (and there is no - // explicit -unroll-threshold). - static const unsigned OptSizeUnrollThreshold = 50; - - // Default unroll count for loops with run-time trip count if - // -unroll-count is not set - static const unsigned UnrollRuntimeCount = 8; - - unsigned CurrentCount; - unsigned CurrentThreshold; - unsigned CurrentPercentDynamicCostSavedThreshold; - unsigned CurrentDynamicCostSavingsDiscount; - bool CurrentAllowPartial; - bool CurrentRuntime; - - // Flags for whether the 'current' settings are user-specified. - bool UserCount; - bool UserThreshold; - bool UserPercentDynamicCostSavedThreshold; - bool UserDynamicCostSavingsDiscount; - bool UserAllowPartial; - bool UserRuntime; - - bool runOnLoop(Loop *L, LPPassManager &) override; - - /// This transformation requires natural loop information & requires that - /// loop preheaders be inserted into the CFG... - /// - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<LoopInfoWrapperPass>(); - AU.addPreserved<LoopInfoWrapperPass>(); - AU.addRequiredID(LoopSimplifyID); - AU.addPreservedID(LoopSimplifyID); - AU.addRequiredID(LCSSAID); - AU.addPreservedID(LCSSAID); - AU.addRequired<ScalarEvolutionWrapperPass>(); - AU.addPreserved<ScalarEvolutionWrapperPass>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. - // If loop unroll does not preserve dom info then LCSSA pass on next - // loop will receive invalid dom info. - // For now, recreate dom info, if loop is unrolled. - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - } +/// A magic value for use with the Threshold parameter to indicate +/// that the loop unroll should be performed regardless of how much +/// code expansion would result. +static const unsigned NoThreshold = UINT_MAX; - // Fill in the UnrollingPreferences parameter with values from the - // TargetTransformationInfo. - void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI, - TargetTransformInfo::UnrollingPreferences &UP) { - UP.Threshold = CurrentThreshold; - UP.PercentDynamicCostSavedThreshold = - CurrentPercentDynamicCostSavedThreshold; - UP.DynamicCostSavingsDiscount = CurrentDynamicCostSavingsDiscount; - UP.OptSizeThreshold = OptSizeUnrollThreshold; - UP.PartialThreshold = CurrentThreshold; - UP.PartialOptSizeThreshold = OptSizeUnrollThreshold; - UP.Count = CurrentCount; - UP.MaxCount = UINT_MAX; - UP.Partial = CurrentAllowPartial; - UP.Runtime = CurrentRuntime; - UP.AllowExpensiveTripCount = false; - TTI.getUnrollingPreferences(L, UP); - } +/// Default unroll count for loops with run-time trip count if +/// -unroll-count is not set +static const unsigned DefaultUnrollRuntimeCount = 8; - // Select and return an unroll count based on parameters from - // user, unroll preferences, unroll pragmas, or a heuristic. - // SetExplicitly is set to true if the unroll count is is set by - // the user or a pragma rather than selected heuristically. - unsigned - selectUnrollCount(const Loop *L, unsigned TripCount, bool PragmaFullUnroll, - unsigned PragmaCount, - const TargetTransformInfo::UnrollingPreferences &UP, - bool &SetExplicitly); - - // Select threshold values used to limit unrolling based on a - // total unrolled size. Parameters Threshold and PartialThreshold - // are set to the maximum unrolled size for fully and partially - // unrolled loops respectively. - void selectThresholds(const Loop *L, bool UsePragmaThreshold, - const TargetTransformInfo::UnrollingPreferences &UP, - unsigned &Threshold, unsigned &PartialThreshold, - unsigned &PercentDynamicCostSavedThreshold, - unsigned &DynamicCostSavingsDiscount) { - // Determine the current unrolling threshold. While this is - // normally set from UnrollThreshold, it is overridden to a - // smaller value if the current function is marked as - // optimize-for-size, and the unroll threshold was not user - // specified. - Threshold = UserThreshold ? CurrentThreshold : UP.Threshold; - PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold; - PercentDynamicCostSavedThreshold = - UserPercentDynamicCostSavedThreshold - ? CurrentPercentDynamicCostSavedThreshold - : UP.PercentDynamicCostSavedThreshold; - DynamicCostSavingsDiscount = UserDynamicCostSavingsDiscount - ? CurrentDynamicCostSavingsDiscount - : UP.DynamicCostSavingsDiscount; - - if (!UserThreshold && - // FIXME: Use Function::optForSize(). - L->getHeader()->getParent()->hasFnAttribute( - Attribute::OptimizeForSize)) { - Threshold = UP.OptSizeThreshold; - PartialThreshold = UP.PartialOptSizeThreshold; - } - if (UsePragmaThreshold) { - // If the loop has an unrolling pragma, we want to be more - // aggressive with unrolling limits. Set thresholds to at - // least the PragmaTheshold value which is larger than the - // default limits. - if (Threshold != NoThreshold) - Threshold = std::max<unsigned>(Threshold, PragmaUnrollThreshold); - if (PartialThreshold != NoThreshold) - PartialThreshold = - std::max<unsigned>(PartialThreshold, PragmaUnrollThreshold); - } - } - bool canUnrollCompletely(Loop *L, unsigned Threshold, - unsigned PercentDynamicCostSavedThreshold, - unsigned DynamicCostSavingsDiscount, - uint64_t UnrolledCost, uint64_t RolledDynamicCost); - }; -} +/// Gather the various unrolling parameters based on the defaults, compiler +/// flags, TTI overrides, pragmas, and user specified parameters. +static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( + Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold, + Optional<unsigned> UserCount, Optional<bool> UserAllowPartial, + Optional<bool> UserRuntime, unsigned PragmaCount, bool PragmaFullUnroll, + bool PragmaEnableUnroll, unsigned TripCount) { + TargetTransformInfo::UnrollingPreferences UP; -char LoopUnroll::ID = 0; -INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopSimplify) -INITIALIZE_PASS_DEPENDENCY(LCSSA) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) + // Set up the defaults + UP.Threshold = 150; + UP.PercentDynamicCostSavedThreshold = 20; + UP.DynamicCostSavingsDiscount = 2000; + UP.OptSizeThreshold = 50; + UP.PartialThreshold = UP.Threshold; + UP.PartialOptSizeThreshold = UP.OptSizeThreshold; + UP.Count = 0; + UP.MaxCount = UINT_MAX; + UP.Partial = false; + UP.Runtime = false; + UP.AllowExpensiveTripCount = false; + + // Override with any target specific settings + TTI.getUnrollingPreferences(L, UP); + + // Apply size attributes + if (L->getHeader()->getParent()->optForSize()) { + UP.Threshold = UP.OptSizeThreshold; + UP.PartialThreshold = UP.PartialOptSizeThreshold; + } -Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial, - int Runtime) { - return new LoopUnroll(Threshold, Count, AllowPartial, Runtime); -} + // Apply unroll count pragmas + if (PragmaCount) + UP.Count = PragmaCount; + else if (PragmaFullUnroll) + UP.Count = TripCount; -Pass *llvm::createSimpleLoopUnrollPass() { - return llvm::createLoopUnrollPass(-1, -1, 0, 0); + // Apply any user values specified by cl::opt + if (UnrollThreshold.getNumOccurrences() > 0) { + UP.Threshold = UnrollThreshold; + UP.PartialThreshold = UnrollThreshold; + } + if (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0) + UP.PercentDynamicCostSavedThreshold = + UnrollPercentDynamicCostSavedThreshold; + if (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0) + UP.DynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount; + if (UnrollCount.getNumOccurrences() > 0) + UP.Count = UnrollCount; + if (UnrollAllowPartial.getNumOccurrences() > 0) + UP.Partial = UnrollAllowPartial; + if (UnrollRuntime.getNumOccurrences() > 0) + UP.Runtime = UnrollRuntime; + + // Apply user values provided by argument + if (UserThreshold.hasValue()) { + UP.Threshold = *UserThreshold; + UP.PartialThreshold = *UserThreshold; + } + if (UserCount.hasValue()) + UP.Count = *UserCount; + if (UserAllowPartial.hasValue()) + UP.Partial = *UserAllowPartial; + if (UserRuntime.hasValue()) + UP.Runtime = *UserRuntime; + + if (PragmaCount > 0 || + ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0)) { + // If the loop has an unrolling pragma, we want to be more aggressive with + // unrolling limits. Set thresholds to at least the PragmaTheshold value + // which is larger than the default limits. + if (UP.Threshold != NoThreshold) + UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold); + if (UP.PartialThreshold != NoThreshold) + UP.PartialThreshold = + std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold); + } + + return UP; } namespace { @@ -793,12 +706,11 @@ static void SetLoopAlreadyUnrolled(Loop *L) { L->setLoopID(NewLoopID); } -bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold, - unsigned PercentDynamicCostSavedThreshold, - unsigned DynamicCostSavingsDiscount, - uint64_t UnrolledCost, - uint64_t RolledDynamicCost) { - +static bool canUnrollCompletely(Loop *L, unsigned Threshold, + unsigned PercentDynamicCostSavedThreshold, + unsigned DynamicCostSavingsDiscount, + uint64_t UnrolledCost, + uint64_t RolledDynamicCost) { if (Threshold == NoThreshold) { DEBUG(dbgs() << " Can fully unroll, because no threshold is set.\n"); return true; @@ -846,60 +758,13 @@ bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold, return false; } -unsigned LoopUnroll::selectUnrollCount( - const Loop *L, unsigned TripCount, bool PragmaFullUnroll, - unsigned PragmaCount, const TargetTransformInfo::UnrollingPreferences &UP, - bool &SetExplicitly) { - SetExplicitly = true; - - // User-specified count (either as a command-line option or - // constructor parameter) has highest precedence. - unsigned Count = UserCount ? CurrentCount : 0; - - // If there is no user-specified count, unroll pragmas have the next - // highest precedence. - if (Count == 0) { - if (PragmaCount) { - Count = PragmaCount; - } else if (PragmaFullUnroll) { - Count = TripCount; - } - } - - if (Count == 0) - Count = UP.Count; - - if (Count == 0) { - SetExplicitly = false; - if (TripCount == 0) - // Runtime trip count. - Count = UnrollRuntimeCount; - else - // Conservative heuristic: if we know the trip count, see if we can - // completely unroll (subject to the threshold, checked below); otherwise - // try to find greatest modulo of the trip count which is still under - // threshold value. - Count = TripCount; - } - if (TripCount && Count > TripCount) - return TripCount; - return Count; -} - -bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) { - if (skipOptnoneFunction(L)) - return false; - - Function &F = *L->getHeader()->getParent(); - - auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - const TargetTransformInfo &TTI = - getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); - +static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, + ScalarEvolution *SE, const TargetTransformInfo &TTI, + AssumptionCache &AC, bool PreserveLCSSA, + Optional<unsigned> ProvidedCount, + Optional<unsigned> ProvidedThreshold, + Optional<bool> ProvidedAllowPartial, + Optional<bool> ProvidedRuntime) { BasicBlock *Header = L->getHeader(); DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() << "] Loop %" << Header->getName() << "\n"); @@ -912,9 +777,6 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) { unsigned PragmaCount = UnrollCountPragmaValue(L); bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0; - TargetTransformInfo::UnrollingPreferences UP; - getUnrollingPreferences(L, TTI, UP); - // Find trip count and trip multiple if count is not available unsigned TripCount = 0; unsigned TripMultiple = 1; @@ -929,11 +791,18 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) { TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); } - // Select an initial unroll count. This may be reduced later based - // on size thresholds. - bool CountSetExplicitly; - unsigned Count = selectUnrollCount(L, TripCount, PragmaFullUnroll, - PragmaCount, UP, CountSetExplicitly); + TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( + L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, + ProvidedRuntime, PragmaCount, PragmaFullUnroll, PragmaEnableUnroll, + TripCount); + + unsigned Count = UP.Count; + bool CountSetExplicitly = Count != 0; + // Use a heuristic count if we didn't set anything explicitly. + if (!CountSetExplicitly) + Count = TripCount == 0 ? DefaultUnrollRuntimeCount : TripCount; + if (TripCount && Count > TripCount) + Count = TripCount; unsigned NumInlineCandidates; bool notDuplicatable; @@ -955,21 +824,6 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) { return false; } - unsigned Threshold, PartialThreshold; - unsigned PercentDynamicCostSavedThreshold; - unsigned DynamicCostSavingsDiscount; - // Only use the high pragma threshold when we have a target unroll factor such - // as with "#pragma unroll N" or a pragma indicating full unrolling and the - // trip count is known. Otherwise we rely on the standard threshold to - // heuristically select a reasonable unroll count. - bool UsePragmaThreshold = - PragmaCount > 0 || - ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0); - - selectThresholds(L, UsePragmaThreshold, UP, Threshold, PartialThreshold, - PercentDynamicCostSavedThreshold, - DynamicCostSavingsDiscount); - // Given Count, TripCount and thresholds determine the type of // unrolling which is to be performed. enum { Full = 0, Partial = 1, Runtime = 2 }; @@ -977,19 +831,20 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) { if (TripCount && Count == TripCount) { Unrolling = Partial; // If the loop is really small, we don't need to run an expensive analysis. - if (canUnrollCompletely(L, Threshold, 100, DynamicCostSavingsDiscount, + if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount, UnrolledSize, UnrolledSize)) { Unrolling = Full; } else { // The loop isn't that small, but we still can fully unroll it if that // helps to remove a significant number of instructions. // To check that, run additional analysis on the loop. - if (Optional<EstimatedUnrollCost> Cost = - analyzeLoopUnrollCost(L, TripCount, DT, *SE, TTI, - Threshold + DynamicCostSavingsDiscount)) - if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold, - DynamicCostSavingsDiscount, Cost->UnrolledCost, - Cost->RolledDynamicCost)) { + if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost( + L, TripCount, DT, *SE, TTI, + UP.Threshold + UP.DynamicCostSavingsDiscount)) + if (canUnrollCompletely(L, UP.Threshold, + UP.PercentDynamicCostSavedThreshold, + UP.DynamicCostSavingsDiscount, + Cost->UnrolledCost, Cost->RolledDynamicCost)) { Unrolling = Full; } } @@ -1001,23 +856,22 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) { // Reduce count based on the type of unrolling and the threshold values. unsigned OriginalCount = Count; - bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) || - (UserRuntime ? CurrentRuntime : UP.Runtime); + bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) || UP.Runtime; // Don't unroll a runtime trip count loop with unroll full pragma. if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) { AllowRuntime = false; } if (Unrolling == Partial) { - bool AllowPartial = PragmaEnableUnroll || - (UserAllowPartial ? CurrentAllowPartial : UP.Partial); + bool AllowPartial = PragmaEnableUnroll || UP.Partial; if (!AllowPartial && !CountSetExplicitly) { DEBUG(dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"); return false; } - if (PartialThreshold != NoThreshold && UnrolledSize > PartialThreshold) { + if (UP.PartialThreshold != NoThreshold && + UnrolledSize > UP.PartialThreshold) { // Reduce unroll count to be modulo of TripCount for partial unrolling. - Count = (std::max(PartialThreshold, 3u)-2) / (LoopSize-2); + Count = (std::max(UP.PartialThreshold, 3u) - 2) / (LoopSize - 2); while (Count != 0 && TripCount % Count != 0) Count--; } @@ -1029,7 +883,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) { } // Reduce unroll count to be the largest power-of-two factor of // the original count which satisfies the threshold limit. - while (Count != 0 && UnrolledSize > PartialThreshold) { + while (Count != 0 && UnrolledSize > UP.PartialThreshold) { Count >>= 1; UnrolledSize = (LoopSize-2) * Count + 2; } @@ -1086,3 +940,91 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) { return true; } + +namespace { +class LoopUnroll : public LoopPass { +public: + static char ID; // Pass ID, replacement for typeid + LoopUnroll(Optional<unsigned> Threshold = None, + Optional<unsigned> Count = None, + Optional<bool> AllowPartial = None, Optional<bool> Runtime = None) + : LoopPass(ID), ProvidedCount(Count), ProvidedThreshold(Threshold), + ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime) { + initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); + } + + Optional<unsigned> ProvidedCount; + Optional<unsigned> ProvidedThreshold; + Optional<bool> ProvidedAllowPartial; + Optional<bool> ProvidedRuntime; + + bool runOnLoop(Loop *L, LPPassManager &) override { + if (skipOptnoneFunction(L)) + return false; + + Function &F = *L->getHeader()->getParent(); + + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + const TargetTransformInfo &TTI = + getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + + return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, PreserveLCSSA, ProvidedCount, + ProvidedThreshold, ProvidedAllowPartial, + ProvidedRuntime); + } + + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG... + /// + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addPreservedID(LCSSAID); + AU.addRequired<ScalarEvolutionWrapperPass>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. + // If loop unroll does not preserve dom info then LCSSA pass on next + // loop will receive invalid dom info. + // For now, recreate dom info, if loop is unrolled. + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + } +}; +} + +char LoopUnroll::ID = 0; +INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) + +Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial, + int Runtime) { + // TODO: It would make more sense for this function to take the optionals + // directly, but that's dangerous since it would silently break out of tree + // callers. + return new LoopUnroll(Threshold == -1 ? None : Optional<unsigned>(Threshold), + Count == -1 ? None : Optional<unsigned>(Count), + AllowPartial == -1 ? None + : Optional<bool>(AllowPartial), + Runtime == -1 ? None : Optional<bool>(Runtime)); +} + +Pass *llvm::createSimpleLoopUnrollPass() { + return llvm::createLoopUnrollPass(-1, -1, 0, 0); +} diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 7354016..6b43b0f 100644 --- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -529,12 +529,13 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // We found an instruction that may write to the loaded memory. // We can try to promote at this position instead of the store - // position if nothing alias the store memory after this. + // position if nothing alias the store memory after this and the store + // destination is not in the range. P = &*I; for (; I != E; ++I) { MemoryLocation StoreLoc = MemoryLocation::get(SI); - if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) { - DEBUG(dbgs() << "Alias " << *I << "\n"); + if (&*I == SI->getOperand(1) || + AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) { P = nullptr; break; } @@ -628,13 +629,39 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // Ensure that the value being stored is something that can be memset'able a // byte at a time like "0" or "-1" or any width, as well as things like // 0xA0A0A0A0 and 0.0. - if (Value *ByteVal = isBytewiseValue(SI->getOperand(0))) + auto *V = SI->getOperand(0); + if (Value *ByteVal = isBytewiseValue(V)) { if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) { BBI = I->getIterator(); // Don't invalidate iterator. return true; } + // If we have an aggregate, we try to promote it to memset regardless + // of opportunity for merging as it can expose optimization opportunities + // in subsequent passes. + auto *T = V->getType(); + if (T->isAggregateType()) { + uint64_t Size = DL.getTypeStoreSize(T); + unsigned Align = SI->getAlignment(); + if (!Align) + Align = DL.getABITypeAlignment(T); + IRBuilder<> Builder(SI); + auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, + Size, Align, SI->isVolatile()); + + DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n"); + + MD->removeInstruction(SI); + SI->eraseFromParent(); + NumMemSetInfer++; + + // Make sure we do not invalidate the iterator. + BBI = M->getIterator(); + return true; + } + } + return false; } diff --git a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp index 28c610c..b56b355 100644 --- a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp @@ -499,7 +499,7 @@ static bool isGCSafepointPoll(Function &F) { static bool shouldRewriteFunction(Function &F) { // TODO: This should check the GCStrategy if (F.hasGC()) { - const char *FunctionGCName = F.getGC(); + const auto &FunctionGCName = F.getGC(); const StringRef StatepointExampleName("statepoint-example"); const StringRef CoreCLRName("coreclr"); return (StatepointExampleName == FunctionGCName) || diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp index 401a740..bcadd4e 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -2155,7 +2155,8 @@ void Reassociate::OptimizeInst(Instruction *I) { // During the initial run we will get to the root of the tree. // But if we get here while we are redoing instructions, there is no // guarantee that the root will be visited. So Redo later - if (BO->user_back() != BO) + if (BO->user_back() != BO && + BO->getParent() == BO->user_back()->getParent()) RedoInsts.insert(BO->user_back()); return; } diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 5d253be..d77d574 100644 --- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -78,6 +78,13 @@ static cl::opt<bool> AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info", cl::Hidden, cl::init(true)); +/// Should we split vectors of pointers into their individual elements? This +/// is known to be buggy, but the alternate implementation isn't yet ready. +/// This is purely to provide a debugging and dianostic hook until the vector +/// split is replaced with vector relocations. +static cl::opt<bool> UseVectorSplit("rs4gc-split-vector-values", cl::Hidden, + cl::init(true)); + namespace { struct RewriteStatepointsForGC : public ModulePass { static char ID; // Pass identification, replacement for typeid @@ -357,10 +364,6 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I); /// particular element in 'I'. static BaseDefiningValueResult findBaseDefiningValueOfVector(Value *I) { - assert(I->getType()->isVectorTy() && - cast<VectorType>(I->getType())->getElementType()->isPointerTy() && - "Illegal to ask for the base pointer of a non-pointer type"); - // Each case parallels findBaseDefiningValue below, see that code for // detailed motivation. @@ -368,26 +371,10 @@ findBaseDefiningValueOfVector(Value *I) { // An incoming argument to the function is a base pointer return BaseDefiningValueResult(I, true); - // We shouldn't see the address of a global as a vector value? - assert(!isa<GlobalVariable>(I) && - "unexpected global variable found in base of vector"); - - // inlining could possibly introduce phi node that contains - // undef if callee has multiple returns - if (isa<UndefValue>(I)) - // utterly meaningless, but useful for dealing with partially optimized - // code. + if (isa<Constant>(I)) + // Constant vectors consist only of constant pointers. return BaseDefiningValueResult(I, true); - // Due to inheritance, this must be _after_ the global variable and undef - // checks - if (Constant *Con = dyn_cast<Constant>(I)) { - assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) && - "order of checks wrong!"); - assert(Con->isNullValue() && "null is the only case which makes sense"); - return BaseDefiningValueResult(Con, true); - } - if (isa<LoadInst>(I)) return BaseDefiningValueResult(I, true); @@ -417,11 +404,11 @@ findBaseDefiningValueOfVector(Value *I) { /// (i.e. a PHI or Select of two derived pointers), or c) involves a change /// from pointer to vector type or back. static BaseDefiningValueResult findBaseDefiningValue(Value *I) { + assert(I->getType()->isPtrOrPtrVectorTy() && + "Illegal to ask for the base pointer of a non-pointer type"); + if (I->getType()->isVectorTy()) return findBaseDefiningValueOfVector(I); - - assert(I->getType()->isPointerTy() && - "Illegal to ask for the base pointer of a non-pointer type"); if (isa<Argument>(I)) // An incoming argument to the function is a base pointer @@ -1310,18 +1297,29 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables, assert(Index < LiveVec.size() && "Bug in std::find?"); return Index; }; - - // All gc_relocate are set to i8 addrspace(1)* type. We originally generated - // unique declarations for each pointer type, but this proved problematic - // because the intrinsic mangling code is incomplete and fragile. Since - // we're moving towards a single unified pointer type anyways, we can just - // cast everything to an i8* of the right address space. A bitcast is added - // later to convert gc_relocate to the actual value's type. Module *M = StatepointToken->getModule(); - auto AS = cast<PointerType>(LiveVariables[0]->getType())->getAddressSpace(); - Type *Types[] = {Type::getInt8PtrTy(M->getContext(), AS)}; - Value *GCRelocateDecl = - Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types); + + // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose + // element type is i8 addrspace(1)*). We originally generated unique + // declarations for each pointer type, but this proved problematic because + // the intrinsic mangling code is incomplete and fragile. Since we're moving + // towards a single unified pointer type anyways, we can just cast everything + // to an i8* of the right address space. A bitcast is added later to convert + // gc_relocate to the actual value's type. + auto getGCRelocateDecl = [&] (Type *Ty) { + assert(isHandledGCPointerType(Ty)); + auto AS = Ty->getScalarType()->getPointerAddressSpace(); + Type *NewTy = Type::getInt8PtrTy(M->getContext(), AS); + if (auto *VT = dyn_cast<VectorType>(Ty)) + NewTy = VectorType::get(NewTy, VT->getNumElements()); + return Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, + {NewTy}); + }; + + // Lazily populated map from input types to the canonicalized form mentioned + // in the comment above. This should probably be cached somewhere more + // broadly. + DenseMap<Type*, Value*> TypeToDeclMap; for (unsigned i = 0; i < LiveVariables.size(); i++) { // Generate the gc.relocate call and save the result @@ -1329,6 +1327,11 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables, Builder.getInt32(LiveStart + FindIndex(LiveVariables, BasePtrs[i])); Value *LiveIdx = Builder.getInt32(LiveStart + i); + Type *Ty = LiveVariables[i]->getType(); + if (!TypeToDeclMap.count(Ty)) + TypeToDeclMap[Ty] = getGCRelocateDecl(Ty); + Value *GCRelocateDecl = TypeToDeclMap[Ty]; + // only specify a debug name if we can give a useful one CallInst *Reloc = Builder.CreateCall( GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx}, @@ -2380,15 +2383,19 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // Do a limited scalarization of any live at safepoint vector values which // contain pointers. This enables this pass to run after vectorization at - // the cost of some possible performance loss. TODO: it would be nice to - // natively support vectors all the way through the backend so we don't need - // to scalarize here. - for (size_t i = 0; i < Records.size(); i++) { - PartiallyConstructedSafepointRecord &Info = Records[i]; - Instruction *Statepoint = ToUpdate[i].getInstruction(); - splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet, - Info.PointerToBase, DT); - } + // the cost of some possible performance loss. Note: This is known to not + // handle updating of the side tables correctly which can lead to relocation + // bugs when the same vector is live at multiple statepoints. We're in the + // process of implementing the alternate lowering - relocating the + // vector-of-pointers as first class item and updating the backend to + // understand that - but that's not yet complete. + if (UseVectorSplit) + for (size_t i = 0; i < Records.size(); i++) { + PartiallyConstructedSafepointRecord &Info = Records[i]; + Instruction *Statepoint = ToUpdate[i].getInstruction(); + splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet, + Info.PointerToBase, DT); + } // In order to reduce live set of statepoint we might choose to rematerialize // some values instead of relocating them. This is purely an optimization and @@ -2467,7 +2474,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, #ifndef NDEBUG // sanity check for (auto *Ptr : Live) - assert(isGCPointerType(Ptr->getType()) && "must be a gc pointer type"); + assert(isHandledGCPointerType(Ptr->getType()) && + "must be a gc pointer type"); #endif relocationViaAlloca(F, DT, Live, Records); @@ -2547,7 +2555,7 @@ void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) { static bool shouldRewriteStatepointsIn(Function &F) { // TODO: This should check the GCStrategy if (F.hasGC()) { - const char *FunctionGCName = F.getGC(); + const auto &FunctionGCName = F.getGC(); const StringRef StatepointExampleName("statepoint-example"); const StringRef CoreCLRName("coreclr"); return (StatepointExampleName == FunctionGCName) || diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp index 2fca803..8569e08 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -757,9 +757,14 @@ void SCCPSolver::visitCastInst(CastInst &I) { LatticeVal OpSt = getValueState(I.getOperand(0)); if (OpSt.isOverdefined()) // Inherit overdefinedness of operand markOverdefined(&I); - else if (OpSt.isConstant()) // Propagate constant value - markConstant(&I, ConstantExpr::getCast(I.getOpcode(), - OpSt.getConstant(), I.getType())); + else if (OpSt.isConstant()) { + Constant *C = + ConstantExpr::getCast(I.getOpcode(), OpSt.getConstant(), I.getType()); + if (isa<UndefValue>(C)) + return; + // Propagate constant value + markConstant(&I, C); + } } @@ -859,10 +864,14 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { LatticeVal &IV = ValueState[&I]; if (IV.isOverdefined()) return; - if (V1State.isConstant() && V2State.isConstant()) - return markConstant(IV, &I, - ConstantExpr::get(I.getOpcode(), V1State.getConstant(), - V2State.getConstant())); + if (V1State.isConstant() && V2State.isConstant()) { + Constant *C = ConstantExpr::get(I.getOpcode(), V1State.getConstant(), + V2State.getConstant()); + // X op Y -> undef. + if (isa<UndefValue>(C)) + return; + return markConstant(IV, &I, C); + } // If something is undef, wait for it to resolve. if (!V1State.isOverdefined() && !V2State.isOverdefined()) @@ -917,10 +926,13 @@ void SCCPSolver::visitCmpInst(CmpInst &I) { LatticeVal &IV = ValueState[&I]; if (IV.isOverdefined()) return; - if (V1State.isConstant() && V2State.isConstant()) - return markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(), - V1State.getConstant(), - V2State.getConstant())); + if (V1State.isConstant() && V2State.isConstant()) { + Constant *C = ConstantExpr::getCompare( + I.getPredicate(), V1State.getConstant(), V2State.getConstant()); + if (isa<UndefValue>(C)) + return; + return markConstant(IV, &I, C); + } // If operands are still undefined, wait for it to resolve. if (!V1State.isOverdefined() && !V2State.isOverdefined()) @@ -1020,8 +1032,11 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) { Constant *Ptr = Operands[0]; auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end()); - markConstant(&I, ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, - Indices)); + Constant *C = + ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, Indices); + if (isa<UndefValue>(C)) + return; + markConstant(&I, C); } void SCCPSolver::visitStoreInst(StoreInst &SI) { @@ -1061,9 +1076,9 @@ void SCCPSolver::visitLoadInst(LoadInst &I) { Constant *Ptr = PtrVal.getConstant(); - // load null -> null + // load null is undefined. if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0) - return markConstant(IV, &I, UndefValue::get(I.getType())); + return; // Transform load (constant global) into the value loaded. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { @@ -1079,8 +1094,11 @@ void SCCPSolver::visitLoadInst(LoadInst &I) { } // Transform load from a constant into a constant if possible. - if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, DL)) + if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, DL)) { + if (isa<UndefValue>(C)) + return; return markConstant(IV, &I, C); + } // Otherwise we cannot say for certain what value this load will produce. // Bail out. @@ -1122,8 +1140,12 @@ CallOverdefined: // If we can constant fold this, mark the result of the call as a // constant. - if (Constant *C = ConstantFoldCall(F, Operands, TLI)) + if (Constant *C = ConstantFoldCall(F, Operands, TLI)) { + // call -> undef. + if (isa<UndefValue>(C)) + return; return markConstant(I, C); + } } // Otherwise, we don't know anything about this call, mark it overdefined. @@ -1379,6 +1401,11 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // X % undef -> undef. No change. if (Op1LV.isUndefined()) break; + // X / 0 -> undef. No change. + // X % 0 -> undef. No change. + if (Op1LV.isConstant() && Op1LV.getConstant()->isZeroValue()) + break; + // undef / X -> 0. X could be maxint. // undef % X -> 0. X could be 1. markForcedConstant(&I, Constant::getNullValue(ITy)); diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index 0e0b00d..4e84d72 100644 --- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -425,9 +425,7 @@ bool TailCallElim::runTRE(Function &F) { // with themselves. Check to see if we did and clean up our mess if so. This // occurs when a function passes an argument straight through to its tail // call. - for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) { - PHINode *PN = ArgumentPHIs[i]; - + for (PHINode *PN : ArgumentPHIs) { // If the PHI Node is a dynamic constant, replace it with the value it is. if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) { PN->replaceAllUsesWith(PNV); @@ -468,10 +466,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { // return value of the call, it must only use things that are defined before // the call, or movable instructions between the call and the instruction // itself. - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (I->getOperand(i) == CI) - return false; - return true; + return std::find(I->op_begin(), I->op_end(), CI) == I->op_end(); } /// Return true if the specified value is the same when the return would exit diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index a5137e9..72db980 100644 --- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -626,11 +626,17 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, Clone2->setName(Twine("lpad") + Suffix2); NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2); - // Create a PHI node for the two cloned landingpad instructions. - PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad); - PN->addIncoming(Clone1, NewBB1); - PN->addIncoming(Clone2, NewBB2); - LPad->replaceAllUsesWith(PN); + // Create a PHI node for the two cloned landingpad instructions only + // if the original landingpad instruction has some uses. + if (!LPad->use_empty()) { + assert(!LPad->getType()->isTokenTy() && + "Split cannot be applied if LPad is token type. Otherwise an " + "invalid PHINode of token type would be created."); + PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad); + PN->addIncoming(Clone1, NewBB1); + PN->addIncoming(Clone2, NewBB2); + LPad->replaceAllUsesWith(PN); + } LPad->eraseFromParent(); } else { // There is no second clone. Just replace the landing pad with the first diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index 854a3b8..6454afb 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -266,27 +266,14 @@ namespace { bool ModuleLevelChanges; const char *NameSuffix; ClonedCodeInfo *CodeInfo; - CloningDirector *Director; - ValueMapTypeRemapper *TypeMapper; - ValueMaterializer *Materializer; public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, ValueToValueMapTy &valueMap, bool moduleLevelChanges, - const char *nameSuffix, ClonedCodeInfo *codeInfo, - CloningDirector *Director) + const char *nameSuffix, ClonedCodeInfo *codeInfo) : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix), - CodeInfo(codeInfo), Director(Director) { - // These are optional components. The Director may return null. - if (Director) { - TypeMapper = Director->getTypeRemapper(); - Materializer = Director->getValueMaterializer(); - } else { - TypeMapper = nullptr; - Materializer = nullptr; - } - } + CodeInfo(codeInfo) {} /// The specified block is found to be reachable, clone it and /// anything that it can reach. @@ -332,23 +319,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // loop doesn't include the terminator. for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE; ++II) { - // If the "Director" remaps the instruction, don't clone it. - if (Director) { - CloningDirector::CloningAction Action = - Director->handleInstruction(VMap, &*II, NewBB); - // If the cloning director says stop, we want to stop everything, not - // just break out of the loop (which would cause the terminator to be - // cloned). The cloning director is responsible for inserting a proper - // terminator into the new basic block in this case. - if (Action == CloningDirector::StopCloningBB) - return; - // If the cloning director says skip, continue to the next instruction. - // In this case, the cloning director is responsible for mapping the - // skipped instruction to some value that is defined in the new - // basic block. - if (Action == CloningDirector::SkipInstruction) - continue; - } Instruction *NewInst = II->clone(); @@ -356,8 +326,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // nodes for which we defer processing until we update the CFG. if (!isa<PHINode>(NewInst)) { RemapInstruction(NewInst, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper, Materializer); + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); // If we can simplify this instruction to some other value, simply add // a mapping to that value rather than inserting a new instruction into @@ -397,26 +366,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // Finally, clone over the terminator. const TerminatorInst *OldTI = BB->getTerminator(); bool TerminatorDone = false; - if (Director) { - CloningDirector::CloningAction Action - = Director->handleInstruction(VMap, OldTI, NewBB); - // If the cloning director says stop, we want to stop everything, not - // just break out of the loop (which would cause the terminator to be - // cloned). The cloning director is responsible for inserting a proper - // terminator into the new basic block in this case. - if (Action == CloningDirector::StopCloningBB) - return; - if (Action == CloningDirector::CloneSuccessors) { - // If the director says to skip with a terminate instruction, we still - // need to clone this block's successors. - const TerminatorInst *TI = NewBB->getTerminator(); - for (const BasicBlock *Succ : TI->successors()) - ToClone.push_back(Succ); - return; - } - assert(Action != CloningDirector::SkipInstruction && - "SkipInstruction is not valid for terminators."); - } if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) { if (BI->isConditional()) { // If the condition was a known constant in the callee... @@ -485,19 +434,13 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns, - const char *NameSuffix, - ClonedCodeInfo *CodeInfo, - CloningDirector *Director) { + const char *NameSuffix, + ClonedCodeInfo *CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); ValueMapTypeRemapper *TypeMapper = nullptr; ValueMaterializer *Materializer = nullptr; - if (Director) { - TypeMapper = Director->getTypeRemapper(); - Materializer = Director->getValueMaterializer(); - } - #ifndef NDEBUG // If the cloning starts at the beginning of the function, verify that // the function arguments are mapped. @@ -507,7 +450,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, - NameSuffix, CodeInfo, Director); + NameSuffix, CodeInfo); const BasicBlock *StartingBB; if (StartingInst) StartingBB = StartingInst->getParent(); @@ -731,8 +674,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ClonedCodeInfo *CodeInfo, Instruction *TheCall) { CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap, - ModuleLevelChanges, Returns, NameSuffix, CodeInfo, - nullptr); + ModuleLevelChanges, Returns, NameSuffix, CodeInfo); } /// \brief Remaps instructions in \p Blocks using the mapping in \p VMap. diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 0e386ac..d2793e5 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -1051,9 +1052,31 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0)); if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); - if (ExtendedArg) - Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr, + if (ExtendedArg) { + // We're now only describing a subset of the variable. The piece we're + // describing will always be smaller than the variable size, because + // VariableSize == Size of Alloca described by DDI. Since SI stores + // to the alloca described by DDI, if it's first operand is an extend, + // we're guaranteed that before extension, the value was narrower than + // the size of the alloca, hence the size of the described variable. + SmallVector<uint64_t, 3> NewDIExpr; + unsigned PieceOffset = 0; + // If this already is a bit piece, we drop the bit piece from the expression + // and record the offset. + if (DIExpr->isBitPiece()) { + NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()-3); + PieceOffset = DIExpr->getBitPieceOffset(); + } else { + NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); + } + NewDIExpr.push_back(dwarf::DW_OP_bit_piece); + NewDIExpr.push_back(PieceOffset); //Offset + const DataLayout &DL = DDI->getModule()->getDataLayout(); + NewDIExpr.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); // Size + Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, + Builder.createExpression(NewDIExpr), DDI->getDebugLoc(), SI); + } else Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr, DDI->getDebugLoc(), SI); @@ -1407,7 +1430,7 @@ void llvm::removeUnwindEdge(BasicBlock *BB) { /// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false /// otherwise. -bool llvm::removeUnreachableBlocks(Function &F) { +bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) { SmallPtrSet<BasicBlock*, 128> Reachable; bool Changed = markAliveBlocks(F, Reachable); @@ -1428,6 +1451,8 @@ bool llvm::removeUnreachableBlocks(Function &F) { ++SI) if (Reachable.count(*SI)) (*SI)->removePredecessor(&*BB); + if (LVI) + LVI->eraseBlock(&*BB); BB->dropAllReferences(); } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 2499b88..eea9237 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -528,7 +528,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, Loop *OuterL = L->getParentLoop(); // Update LoopInfo if the loop is completely removed. if (CompletelyUnroll) - LI->updateUnloop(L);; + LI->markAsRemoved(L); // If we have a pass and a DominatorTree we should re-simplify impacted loops // to ensure subsequent analyses can rely on this form. We want to simplify @@ -542,7 +542,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after - // LoopInfo's been updated by updateUnloop. + // LoopInfo's been updated by markAsRemoved. Loop *LatchLoop = LI->getLoopFor(Latches.back()); if (!OuterL->contains(LatchLoop)) while (OuterL->getParentLoop() != LatchLoop) diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp index e038805..fa958e9 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -599,7 +599,7 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, IRBuilder<>::FastMathFlagGuard FMFG(Builder); FastMathFlags FMF; FMF.setUnsafeAlgebra(); - Builder.SetFastMathFlags(FMF); + Builder.setFastMathFlags(FMF); Value *Cmp; if (RK == MRK_FloatMin || RK == MRK_FloatMax) diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 3bb3fa5..3125a2c 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -141,6 +141,8 @@ class SimplifyCFGOpt { bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder); bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder); + bool SimplifySingleResume(ResumeInst *RI); + bool SimplifyCommonResume(ResumeInst *RI); bool SimplifyCleanupReturn(CleanupReturnInst *RI); bool SimplifyUnreachable(UnreachableInst *UI); bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder); @@ -3239,14 +3241,101 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, } bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { - // If this is a trivial landing pad that just continues unwinding the caught - // exception then zap the landing pad, turning its invokes into calls. + if (isa<PHINode>(RI->getValue())) + return SimplifyCommonResume(RI); + else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) && + RI->getValue() == RI->getParent()->getFirstNonPHI()) + // The resume must unwind the exception that caused control to branch here. + return SimplifySingleResume(RI); + + return false; +} + +// Simplify resume that is shared by several landing pads (phi of landing pad). +bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) { + BasicBlock *BB = RI->getParent(); + + // Check that there are no other instructions except for debug intrinsics + // between the phi of landing pads (RI->getValue()) and resume instruction. + BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(), + E = RI->getIterator(); + while (++I != E) + if (!isa<DbgInfoIntrinsic>(I)) + return false; + + SmallSet<BasicBlock *, 4> TrivialUnwindBlocks; + auto *PhiLPInst = cast<PHINode>(RI->getValue()); + + // Check incoming blocks to see if any of them are trivial. + for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); + Idx != End; Idx++) { + auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx); + auto *IncomingValue = PhiLPInst->getIncomingValue(Idx); + + // If the block has other successors, we can not delete it because + // it has other dependents. + if (IncomingBB->getUniqueSuccessor() != BB) + continue; + + auto *LandingPad = + dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI()); + // Not the landing pad that caused the control to branch here. + if (IncomingValue != LandingPad) + continue; + + bool isTrivial = true; + + I = IncomingBB->getFirstNonPHI()->getIterator(); + E = IncomingBB->getTerminator()->getIterator(); + while (++I != E) + if (!isa<DbgInfoIntrinsic>(I)) { + isTrivial = false; + break; + } + + if (isTrivial) + TrivialUnwindBlocks.insert(IncomingBB); + } + + // If no trivial unwind blocks, don't do any simplifications. + if (TrivialUnwindBlocks.empty()) return false; + + // Turn all invokes that unwind here into calls. + for (auto *TrivialBB : TrivialUnwindBlocks) { + // Blocks that will be simplified should be removed from the phi node. + // Note there could be multiple edges to the resume block, and we need + // to remove them all. + while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1) + BB->removePredecessor(TrivialBB, true); + + for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB); + PI != PE;) { + BasicBlock *Pred = *PI++; + removeUnwindEdge(Pred); + } + + // In each SimplifyCFG run, only the current processed block can be erased. + // Otherwise, it will break the iteration of SimplifyCFG pass. So instead + // of erasing TrivialBB, we only remove the branch to the common resume + // block so that we can later erase the resume block since it has no + // predecessors. + TrivialBB->getTerminator()->eraseFromParent(); + new UnreachableInst(RI->getContext(), TrivialBB); + } + + // Delete the resume block if all its predecessors have been removed. + if (pred_empty(BB)) + BB->eraseFromParent(); + + return !TrivialUnwindBlocks.empty(); +} + +// Simplify resume that is only used by a single (non-phi) landing pad. +bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) { BasicBlock *BB = RI->getParent(); LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI()); - if (RI->getValue() != LPInst) - // Not a landing pad, or the resume is not unwinding the exception that - // caused control to branch here. - return false; + assert (RI->getValue() == LPInst && + "Resume must unwind the exception that caused control to here"); // Check that there are no other instructions except for debug intrinsics. BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator(); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index dc5fee5..dc07440 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -997,7 +997,7 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, // Propagate fast-math flags from the existing call to the new call. IRBuilder<>::FastMathFlagGuard Guard(B); - B.SetFastMathFlags(CI->getFastMathFlags()); + B.setFastMathFlags(CI->getFastMathFlags()); // floor((double)floatval) -> (double)floorf(floatval) if (Callee->isIntrinsic()) { @@ -1035,7 +1035,7 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { // Propagate fast-math flags from the existing call to the new call. IRBuilder<>::FastMathFlagGuard Guard(B); - B.SetFastMathFlags(CI->getFastMathFlags()); + B.setFastMathFlags(CI->getFastMathFlags()); // fmin((double)floatval1, (double)floatval2) // -> (double)fminf(floatval1, floatval2) @@ -1127,29 +1127,26 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { Callee->getAttributes()); } + // FIXME: Use instruction-level FMF. bool UnsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent()); - // pow(exp(x), y) -> exp(x*y) + // pow(exp(x), y) -> exp(x * y) // pow(exp2(x), y) -> exp2(x * y) - // We enable these only under fast-math. Besides rounding - // differences the transformation changes overflow and - // underflow behavior quite dramatically. + // We enable these only with fast-math. Besides rounding differences, the + // transformation changes overflow and underflow behavior quite dramatically. // Example: x = 1000, y = 0.001. // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1). - if (UnsafeFPMath) { - if (auto *OpC = dyn_cast<CallInst>(Op1)) { + auto *OpC = dyn_cast<CallInst>(Op1); + if (OpC && OpC->hasUnsafeAlgebra() && CI->hasUnsafeAlgebra()) { + LibFunc::Func Func; + Function *OpCCallee = OpC->getCalledFunction(); + if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) && + TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2)) { IRBuilder<>::FastMathFlagGuard Guard(B); - FastMathFlags FMF; - FMF.setUnsafeAlgebra(); - B.SetFastMathFlags(FMF); - - LibFunc::Func Func; - Function *OpCCallee = OpC->getCalledFunction(); - if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) && - TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2)) - return EmitUnaryFloatFnCall( - B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"), - OpCCallee->getName(), B, OpCCallee->getAttributes()); + B.setFastMathFlags(CI->getFastMathFlags()); + Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"); + return EmitUnaryFloatFnCall(FMul, OpCCallee->getName(), B, + OpCCallee->getAttributes()); } } @@ -1167,9 +1164,12 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { LibFunc::fabsl)) { // In -ffast-math, pow(x, 0.5) -> sqrt(x). - if (UnsafeFPMath) + if (CI->hasUnsafeAlgebra()) { + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); return EmitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B, Callee->getAttributes()); + } // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). // This is faster than calling pow, and still handles negative zero @@ -1328,7 +1328,7 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { FMF.setNoSignedZeros(); FMF.setNoNaNs(); } - B.SetFastMathFlags(FMF); + B.setFastMathFlags(FMF); // We have a relaxed floating-point environment. We can ignore NaN-handling // and transform to a compare and select. We do not have to consider errno or @@ -1354,11 +1354,13 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { !FT->getParamType(0)->isFloatingPointTy()) return Ret; - if (!canUseUnsafeFPMath(CI->getParent()->getParent())) + if (!CI->hasUnsafeAlgebra()) return Ret; Value *Op1 = CI->getArgOperand(0); auto *OpC = dyn_cast<CallInst>(Op1); - if (!OpC) + + // The earlier call must also be unsafe in order to do these transforms. + if (!OpC || !OpC->hasUnsafeAlgebra()) return Ret; // log(pow(x,y)) -> y*log(x) @@ -1369,7 +1371,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { IRBuilder<>::FastMathFlagGuard Guard(B); FastMathFlags FMF; FMF.setUnsafeAlgebra(); - B.SetFastMathFlags(FMF); + B.setFastMathFlags(FMF); LibFunc::Func Func; Function *F = OpC->getCalledFunction(); @@ -1397,66 +1399,67 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" || Callee->getIntrinsicID() == Intrinsic::sqrt)) Ret = optimizeUnaryDoubleFP(CI, B, true); - if (!canUseUnsafeFPMath(CI->getParent()->getParent())) + + if (!CI->hasUnsafeAlgebra()) return Ret; - Value *Op = CI->getArgOperand(0); - if (Instruction *I = dyn_cast<Instruction>(Op)) { - if (I->getOpcode() == Instruction::FMul && I->hasUnsafeAlgebra()) { - // We're looking for a repeated factor in a multiplication tree, - // so we can do this fold: sqrt(x * x) -> fabs(x); - // or this fold: sqrt(x * x * y) -> fabs(x) * sqrt(y). - Value *Op0 = I->getOperand(0); - Value *Op1 = I->getOperand(1); - Value *RepeatOp = nullptr; - Value *OtherOp = nullptr; - if (Op0 == Op1) { - // Simple match: the operands of the multiply are identical. - RepeatOp = Op0; - } else { - // Look for a more complicated pattern: one of the operands is itself - // a multiply, so search for a common factor in that multiply. - // Note: We don't bother looking any deeper than this first level or for - // variations of this pattern because instcombine's visitFMUL and/or the - // reassociation pass should give us this form. - Value *OtherMul0, *OtherMul1; - if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) { - // Pattern: sqrt((x * y) * z) - if (OtherMul0 == OtherMul1) { - // Matched: sqrt((x * x) * z) - RepeatOp = OtherMul0; - OtherOp = Op1; - } - } - } - if (RepeatOp) { - // Fast math flags for any created instructions should match the sqrt - // and multiply. - // FIXME: We're not checking the sqrt because it doesn't have - // fast-math-flags (see earlier comment). - IRBuilder<>::FastMathFlagGuard Guard(B); - B.SetFastMathFlags(I->getFastMathFlags()); - // If we found a repeated factor, hoist it out of the square root and - // replace it with the fabs of that factor. - Module *M = Callee->getParent(); - Type *ArgType = Op->getType(); - Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType); - Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs"); - if (OtherOp) { - // If we found a non-repeated factor, we still need to get its square - // root. We then multiply that by the value that was simplified out - // of the square root calculation. - Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType); - Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt"); - return B.CreateFMul(FabsCall, SqrtCall); - } - return FabsCall; + Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0)); + if (!I || I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra()) + return Ret; + + // We're looking for a repeated factor in a multiplication tree, + // so we can do this fold: sqrt(x * x) -> fabs(x); + // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y). + Value *Op0 = I->getOperand(0); + Value *Op1 = I->getOperand(1); + Value *RepeatOp = nullptr; + Value *OtherOp = nullptr; + if (Op0 == Op1) { + // Simple match: the operands of the multiply are identical. + RepeatOp = Op0; + } else { + // Look for a more complicated pattern: one of the operands is itself + // a multiply, so search for a common factor in that multiply. + // Note: We don't bother looking any deeper than this first level or for + // variations of this pattern because instcombine's visitFMUL and/or the + // reassociation pass should give us this form. + Value *OtherMul0, *OtherMul1; + if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) { + // Pattern: sqrt((x * y) * z) + if (OtherMul0 == OtherMul1 && + cast<Instruction>(Op0)->hasUnsafeAlgebra()) { + // Matched: sqrt((x * x) * z) + RepeatOp = OtherMul0; + OtherOp = Op1; } } } - return Ret; -} + if (!RepeatOp) + return Ret; + // Fast math flags for any created instructions should match the sqrt + // and multiply. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(I->getFastMathFlags()); + + // If we found a repeated factor, hoist it out of the square root and + // replace it with the fabs of that factor. + Module *M = Callee->getParent(); + Type *ArgType = I->getType(); + Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType); + Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs"); + if (OtherOp) { + // If we found a non-repeated factor, we still need to get its square + // root. We then multiply that by the value that was simplified out + // of the square root calculation. + Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType); + Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt"); + return B.CreateFMul(FabsCall, SqrtCall); + } + return FabsCall; +} + +// TODO: Generalize to handle any trig function and its inverse. Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); Value *Ret = nullptr; @@ -1471,13 +1474,15 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) { !FT->getParamType(0)->isFloatingPointTy()) return Ret; - if (!canUseUnsafeFPMath(CI->getParent()->getParent())) - return Ret; Value *Op1 = CI->getArgOperand(0); auto *OpC = dyn_cast<CallInst>(Op1); if (!OpC) return Ret; + // Both calls must allow unsafe optimizations in order to remove them. + if (!CI->hasUnsafeAlgebra() || !OpC->hasUnsafeAlgebra()) + return Ret; + // tan(atan(x)) -> x // tanf(atanf(x)) -> x // tanl(atanl(x)) -> x diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp index 2e361d3..f47ddb9 100644 --- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -222,8 +222,17 @@ static void resolveCycles(Metadata *MD, bool AllowTemps) { if (auto *N = dyn_cast_or_null<MDNode>(MD)) { if (AllowTemps && N->isTemporary()) return; - if (!N->isResolved()) - N->resolveCycles(AllowTemps); + if (!N->isResolved()) { + if (AllowTemps) + // Note that this will drop RAUW support on any temporaries, which + // blocks uniquing. If this ends up being an issue, in the future + // we can experiment with delaying resolving these nodes until + // after metadata is fully materialized (i.e. when linking metadata + // as a postpass after function importing). + N->resolveNonTemporaries(); + else + N->resolveCycles(); + } } } diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9ed44d1..27d3337 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -222,7 +222,7 @@ static void propagateIRFlags(Value *I, ArrayRef<Value *> VL) { } } } - + /// \returns \p I after propagating metadata from \p VL. static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) { Instruction *I0 = cast<Instruction>(VL[0]); @@ -506,7 +506,7 @@ private: } return Last; } - + /// -- Vectorization State -- /// Holds all of the tree entries. std::vector<TreeEntry> VectorizableTree; @@ -884,7 +884,7 @@ private: /// The current size of the scheduling region. int ScheduleRegionSize; - + /// The maximum size allowed for the scheduling region. int ScheduleRegionSizeLimit; @@ -1089,7 +1089,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { newTreeEntry(VL, false); return; } - + // Check that every instructions appears once in this bundle. for (unsigned i = 0, e = VL.size(); i < e; ++i) for (unsigned j = i+1; j < e; ++j) @@ -1711,7 +1711,7 @@ int BoUpSLP::getSpillCost() { int Cost = 0; SmallPtrSet<Instruction*, 4> LiveValues; - Instruction *PrevInst = nullptr; + Instruction *PrevInst = nullptr; for (unsigned N = 0; N < VectorizableTree.size(); ++N) { Instruction *Inst = dyn_cast<Instruction>(VectorizableTree[N].Scalars[0]); @@ -1736,7 +1736,7 @@ int BoUpSLP::getSpillCost() { for (auto &J : PrevInst->operands()) { if (isa<Instruction>(&*J) && ScalarToTreeEntry.count(&*J)) LiveValues.insert(cast<Instruction>(&*J)); - } + } // Now find the sequence of instructions between PrevInst and Inst. BasicBlock::reverse_iterator InstIt(Inst->getIterator()), @@ -1780,30 +1780,29 @@ int BoUpSLP::getTreeCost() { unsigned BundleWidth = VectorizableTree[0].Scalars.size(); - for (unsigned i = 0, e = VectorizableTree.size(); i != e; ++i) { - int C = getEntryCost(&VectorizableTree[i]); + for (TreeEntry &TE : VectorizableTree) { + int C = getEntryCost(&TE); DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle that starts with " - << *VectorizableTree[i].Scalars[0] << " .\n"); + << TE.Scalars[0] << " .\n"); Cost += C; } SmallSet<Value *, 16> ExtractCostCalculated; int ExtractCost = 0; - for (UserList::iterator I = ExternalUses.begin(), E = ExternalUses.end(); - I != E; ++I) { + for (ExternalUser &EU : ExternalUses) { // We only add extract cost once for the same scalar. - if (!ExtractCostCalculated.insert(I->Scalar).second) + if (!ExtractCostCalculated.insert(EU.Scalar).second) continue; // Uses by ephemeral values are free (because the ephemeral value will be // removed prior to code generation, and so the extraction will be // removed as well). - if (EphValues.count(I->User)) + if (EphValues.count(EU.User)) continue; - VectorType *VecTy = VectorType::get(I->Scalar->getType(), BundleWidth); + VectorType *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth); ExtractCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, - I->Lane); + EU.Lane); } Cost += getSpillCost(); @@ -2551,7 +2550,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } Value *BoUpSLP::vectorizeTree() { - + // All blocks must be scheduled before any instructions are inserted. for (auto &BSIter : BlocksSchedules) { scheduleBlock(BSIter.second.get()); @@ -3072,10 +3071,10 @@ void BoUpSLP::BlockScheduling::resetSchedule() { } void BoUpSLP::scheduleBlock(BlockScheduling *BS) { - + if (!BS->ScheduleStart) return; - + DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n"); BS->resetSchedule(); @@ -3590,7 +3589,7 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { /// \param NumEltsToRdx The number of elements that should be reduced in the /// vector. /// \param IsPairwise Whether the reduction is a pairwise or splitting -/// reduction. A pairwise reduction will generate a mask of +/// reduction. A pairwise reduction will generate a mask of /// <0,2,...> or <1,3,..> while a splitting reduction will generate /// <2,3, undef,undef> for a vector of 4 and NumElts = 2. /// \param IsLeft True will generate a mask of even elements, odd otherwise. @@ -3773,7 +3772,7 @@ public: IRBuilder<> Builder(ReductionRoot); FastMathFlags Unsafe; Unsafe.setUnsafeAlgebra(); - Builder.SetFastMathFlags(Unsafe); + Builder.setFastMathFlags(Unsafe); unsigned i = 0; for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) { @@ -4018,9 +4017,8 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { // Collect the incoming values from the PHIs. Incoming.clear(); - for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie; - ++instr) { - PHINode *P = dyn_cast<PHINode>(instr); + for (Instruction &I : *BB) { + PHINode *P = dyn_cast<PHINode>(&I); if (!P) break; diff --git a/contrib/llvm/tools/clang/include/clang/AST/ASTContext.h b/contrib/llvm/tools/clang/include/clang/AST/ASTContext.h index b66009e..abf9294 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/ASTContext.h +++ b/contrib/llvm/tools/clang/include/clang/AST/ASTContext.h @@ -131,6 +131,7 @@ class ASTContext : public RefCountedBase<ASTContext> { mutable llvm::FoldingSet<AutoType> AutoTypes; mutable llvm::FoldingSet<AtomicType> AtomicTypes; llvm::FoldingSet<AttributedType> AttributedTypes; + mutable llvm::FoldingSet<PipeType> PipeTypes; mutable llvm::FoldingSet<QualifiedTemplateName> QualifiedTemplateNames; mutable llvm::FoldingSet<DependentTemplateName> DependentTemplateNames; @@ -1079,6 +1080,9 @@ public: /// blocks. QualType getBlockDescriptorType() const; + /// \brief Return pipe type for the specified type. + QualType getPipeType(QualType T) const; + /// Gets the struct used to keep track of the extended descriptor for /// pointer to blocks. QualType getBlockDescriptorExtendedType() const; @@ -2279,9 +2283,13 @@ public: /// \brief Make an APSInt of the appropriate width and signedness for the /// given \p Value and integer \p Type. llvm::APSInt MakeIntValue(uint64_t Value, QualType Type) const { - llvm::APSInt Res(getIntWidth(Type), - !Type->isSignedIntegerOrEnumerationType()); + // If Type is a signed integer type larger than 64 bits, we need to be sure + // to sign extend Res appropriately. + llvm::APSInt Res(64, !Type->isSignedIntegerOrEnumerationType()); Res = Value; + unsigned Width = getIntWidth(Type); + if (Width != Res.getBitWidth()) + return Res.extOrTrunc(Width); return Res; } diff --git a/contrib/llvm/tools/clang/include/clang/AST/ASTMutationListener.h b/contrib/llvm/tools/clang/include/clang/AST/ASTMutationListener.h index 3ff392d..cf3b55d 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/ASTMutationListener.h +++ b/contrib/llvm/tools/clang/include/clang/AST/ASTMutationListener.h @@ -29,6 +29,7 @@ namespace clang { class ObjCContainerDecl; class ObjCInterfaceDecl; class ObjCPropertyDecl; + class ParmVarDecl; class QualType; class RecordDecl; class TagDecl; @@ -88,6 +89,9 @@ public: /// \brief A function template's definition was instantiated. virtual void FunctionDefinitionInstantiated(const FunctionDecl *D) {} + /// \brief A default argument was instantiated. + virtual void DefaultArgumentInstantiated(const ParmVarDecl *D) {} + /// \brief A new objc category class was added for an interface. virtual void AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD, const ObjCInterfaceDecl *IFD) {} diff --git a/contrib/llvm/tools/clang/include/clang/AST/BuiltinTypes.def b/contrib/llvm/tools/clang/include/clang/AST/BuiltinTypes.def index 85e237a..a08a683 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/BuiltinTypes.def +++ b/contrib/llvm/tools/clang/include/clang/AST/BuiltinTypes.def @@ -1,4 +1,4 @@ -//===-- BuiltinTypeNodes.def - Metadata about BuiltinTypes ------*- C++ -*-===// +//===-- BuiltinTypes.def - Metadata about BuiltinTypes ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/tools/clang/include/clang/AST/Decl.h b/contrib/llvm/tools/clang/include/clang/AST/Decl.h index 046ce70..029c118 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/Decl.h +++ b/contrib/llvm/tools/clang/include/clang/AST/Decl.h @@ -2440,10 +2440,9 @@ class IndirectFieldDecl : public ValueDecl, NamedDecl **Chaining; unsigned ChainingSize; - IndirectFieldDecl(DeclContext *DC, SourceLocation L, + IndirectFieldDecl(ASTContext &C, DeclContext *DC, SourceLocation L, DeclarationName N, QualType T, - NamedDecl **CH, unsigned CHS) - : ValueDecl(IndirectField, DC, L, N, T), Chaining(CH), ChainingSize(CHS) {} + NamedDecl **CH, unsigned CHS); public: static IndirectFieldDecl *Create(ASTContext &C, DeclContext *DC, diff --git a/contrib/llvm/tools/clang/include/clang/AST/DeclBase.h b/contrib/llvm/tools/clang/include/clang/AST/DeclBase.h index 05b2a12..2d6e84a 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/DeclBase.h +++ b/contrib/llvm/tools/clang/include/clang/AST/DeclBase.h @@ -113,6 +113,9 @@ public: /// Tags, declared with 'struct foo;' and referenced with /// 'struct foo'. All tags are also types. This is what /// elaborated-type-specifiers look for in C. + /// This also contains names that conflict with tags in the + /// same scope but that are otherwise ordinary names (non-type + /// template parameters and indirect field declarations). IDNS_Tag = 0x0002, /// Types, declared with 'struct foo', typedefs, etc. @@ -131,7 +134,7 @@ public: IDNS_Namespace = 0x0010, /// Ordinary names. In C, everything that's not a label, tag, - /// or member ends up here. + /// member, or function-local extern ends up here. IDNS_Ordinary = 0x0020, /// Objective C \@protocol. @@ -160,7 +163,9 @@ public: /// This declaration is a function-local extern declaration of a /// variable or function. This may also be IDNS_Ordinary if it - /// has been declared outside any function. + /// has been declared outside any function. These act mostly like + /// invisible friend declarations, but are also visible to unqualified + /// lookup within the scope of the declaring function. IDNS_LocalExtern = 0x0800 }; diff --git a/contrib/llvm/tools/clang/include/clang/AST/Expr.h b/contrib/llvm/tools/clang/include/clang/AST/Expr.h index 095dd6a..38733ee 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/Expr.h +++ b/contrib/llvm/tools/clang/include/clang/AST/Expr.h @@ -1292,6 +1292,7 @@ public: enum CharacterKind { Ascii, Wide, + UTF8, UTF16, UTF32 }; diff --git a/contrib/llvm/tools/clang/include/clang/AST/ExprCXX.h b/contrib/llvm/tools/clang/include/clang/AST/ExprCXX.h index 0608aba..6821274 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/ExprCXX.h +++ b/contrib/llvm/tools/clang/include/clang/AST/ExprCXX.h @@ -951,15 +951,9 @@ public: /// This wraps up a function call argument that was created from the /// corresponding parameter's default argument, when the call did not /// explicitly supply arguments for all of the parameters. -class CXXDefaultArgExpr final - : public Expr, - private llvm::TrailingObjects<CXXDefaultArgExpr, Expr *> { +class CXXDefaultArgExpr final : public Expr { /// \brief The parameter whose default is being used. - /// - /// When the bit is set, the subexpression is stored after the - /// CXXDefaultArgExpr itself. When the bit is clear, the parameter's - /// actual default expression is the subexpression. - llvm::PointerIntPair<ParmVarDecl *, 1, bool> Param; + ParmVarDecl *Param; /// \brief The location where the default argument expression was used. SourceLocation Loc; @@ -971,16 +965,7 @@ class CXXDefaultArgExpr final : param->getDefaultArg()->getType(), param->getDefaultArg()->getValueKind(), param->getDefaultArg()->getObjectKind(), false, false, false, false), - Param(param, false), Loc(Loc) { } - - CXXDefaultArgExpr(StmtClass SC, SourceLocation Loc, ParmVarDecl *param, - Expr *SubExpr) - : Expr(SC, SubExpr->getType(), - SubExpr->getValueKind(), SubExpr->getObjectKind(), - false, false, false, false), - Param(param, true), Loc(Loc) { - *getTrailingObjects<Expr *>() = SubExpr; - } + Param(param), Loc(Loc) { } public: CXXDefaultArgExpr(EmptyShell Empty) : Expr(CXXDefaultArgExprClass, Empty) {} @@ -992,24 +977,15 @@ public: return new (C) CXXDefaultArgExpr(CXXDefaultArgExprClass, Loc, Param); } - // \p Param is the parameter whose default argument is used by this - // expression, and \p SubExpr is the expression that will actually be used. - static CXXDefaultArgExpr *Create(const ASTContext &C, SourceLocation Loc, - ParmVarDecl *Param, Expr *SubExpr); - // Retrieve the parameter that the argument was created from. - const ParmVarDecl *getParam() const { return Param.getPointer(); } - ParmVarDecl *getParam() { return Param.getPointer(); } + const ParmVarDecl *getParam() const { return Param; } + ParmVarDecl *getParam() { return Param; } // Retrieve the actual argument to the function call. const Expr *getExpr() const { - if (Param.getInt()) - return *getTrailingObjects<Expr *>(); return getParam()->getDefaultArg(); } Expr *getExpr() { - if (Param.getInt()) - return *getTrailingObjects<Expr *>(); return getParam()->getDefaultArg(); } @@ -1033,7 +1009,6 @@ public: return child_range(child_iterator(), child_iterator()); } - friend TrailingObjects; friend class ASTStmtReader; friend class ASTStmtWriter; }; diff --git a/contrib/llvm/tools/clang/include/clang/AST/OperationKinds.h b/contrib/llvm/tools/clang/include/clang/AST/OperationKinds.h index 2235c10..102bbc2 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/OperationKinds.h +++ b/contrib/llvm/tools/clang/include/clang/AST/OperationKinds.h @@ -185,7 +185,11 @@ enum CastKind { /// CK_FloatingToBoolean - Floating point to boolean. /// (bool) f CK_FloatingToBoolean, - + + // CK_BooleanToSignedIntegral - Convert a boolean to -1 or 0 for true and + // false, respectively. + CK_BooleanToSignedIntegral, + /// CK_FloatingCast - Casting between floating types of different size. /// (double) f /// (float) ld diff --git a/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h b/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h index e6f7583..0c25a45 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h @@ -978,6 +978,8 @@ DEF_TRAVERSE_TYPE(ObjCObjectPointerType, DEF_TRAVERSE_TYPE(AtomicType, { TRY_TO(TraverseType(T->getValueType())); }) +DEF_TRAVERSE_TYPE(PipeType, { TRY_TO(TraverseType(T->getElementType())); }) + #undef DEF_TRAVERSE_TYPE // ----------------- TypeLoc traversal ----------------- @@ -1206,6 +1208,8 @@ DEF_TRAVERSE_TYPELOC(ObjCObjectPointerType, DEF_TRAVERSE_TYPELOC(AtomicType, { TRY_TO(TraverseTypeLoc(TL.getValueLoc())); }) +DEF_TRAVERSE_TYPELOC(PipeType, { TRY_TO(TraverseTypeLoc(TL.getValueLoc())); }) + #undef DEF_TRAVERSE_TYPELOC // ----------------- Decl traversal ----------------- diff --git a/contrib/llvm/tools/clang/include/clang/AST/Stmt.h b/contrib/llvm/tools/clang/include/clang/AST/Stmt.h index e48b7dc..d3950e9 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/Stmt.h +++ b/contrib/llvm/tools/clang/include/clang/AST/Stmt.h @@ -130,7 +130,7 @@ protected: friend class CharacterLiteral; unsigned : NumExprBits; - unsigned Kind : 2; + unsigned Kind : 3; }; enum APFloatSemantics { diff --git a/contrib/llvm/tools/clang/include/clang/AST/Type.h b/contrib/llvm/tools/clang/include/clang/AST/Type.h index 0c08130..d63b2c4 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/Type.h +++ b/contrib/llvm/tools/clang/include/clang/AST/Type.h @@ -1721,6 +1721,7 @@ public: bool isNDRangeT() const; // OpenCL ndrange_t bool isReserveIDT() const; // OpenCL reserve_id_t + bool isPipeType() const; // OpenCL pipe type bool isOpenCLSpecificType() const; // Any OpenCL specific type /// Determines if this type, which must satisfy @@ -5015,6 +5016,41 @@ class AtomicType : public Type, public llvm::FoldingSetNode { } }; +/// PipeType - OpenCL20. +class PipeType : public Type, public llvm::FoldingSetNode { + QualType ElementType; + + PipeType(QualType elemType, QualType CanonicalPtr) : + Type(Pipe, CanonicalPtr, elemType->isDependentType(), + elemType->isInstantiationDependentType(), + elemType->isVariablyModifiedType(), + elemType->containsUnexpandedParameterPack()), + ElementType(elemType) {} + friend class ASTContext; // ASTContext creates these. + +public: + + QualType getElementType() const { return ElementType; } + + bool isSugared() const { return false; } + + QualType desugar() const { return QualType(this, 0); } + + void Profile(llvm::FoldingSetNodeID &ID) { + Profile(ID, getElementType()); + } + + static void Profile(llvm::FoldingSetNodeID &ID, QualType T) { + ID.AddPointer(T.getAsOpaquePtr()); + } + + + static bool classof(const Type *T) { + return T->getTypeClass() == Pipe; + } + +}; + /// A qualifier set is used to build a set of qualifiers. class QualifierCollector : public Qualifiers { public: @@ -5461,9 +5497,13 @@ inline bool Type::isImageType() const { isImage1dBufferT(); } +inline bool Type::isPipeType() const { + return isa<PipeType>(CanonicalType); +} + inline bool Type::isOpenCLSpecificType() const { return isSamplerT() || isEventT() || isImageType() || isClkEventT() || - isQueueT() || isNDRangeT() || isReserveIDT(); + isQueueT() || isNDRangeT() || isReserveIDT() || isPipeType(); } inline bool Type::isTemplateTypeParmType() const { diff --git a/contrib/llvm/tools/clang/include/clang/AST/TypeLoc.h b/contrib/llvm/tools/clang/include/clang/AST/TypeLoc.h index 26feda5..29035a4 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/TypeLoc.h +++ b/contrib/llvm/tools/clang/include/clang/AST/TypeLoc.h @@ -2033,7 +2033,26 @@ public: } }; +struct PipeTypeLocInfo { + SourceLocation KWLoc; +}; + +class PipeTypeLoc : public ConcreteTypeLoc<UnqualTypeLoc, PipeTypeLoc, PipeType, + PipeTypeLocInfo> { +public: + TypeLoc getValueLoc() const { return this->getInnerTypeLoc(); } + + SourceRange getLocalSourceRange() const { return SourceRange(getKWLoc()); } + + SourceLocation getKWLoc() const { return this->getLocalData()->KWLoc; } + void setKWLoc(SourceLocation Loc) { this->getLocalData()->KWLoc = Loc; } + void initializeLocal(ASTContext &Context, SourceLocation Loc) { + setKWLoc(Loc); + } + + QualType getInnerType() const { return this->getTypePtr()->getElementType(); } +}; } #endif diff --git a/contrib/llvm/tools/clang/include/clang/AST/TypeNodes.def b/contrib/llvm/tools/clang/include/clang/AST/TypeNodes.def index 2549f0b..8caf102 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/TypeNodes.def +++ b/contrib/llvm/tools/clang/include/clang/AST/TypeNodes.def @@ -104,6 +104,7 @@ NON_CANONICAL_UNLESS_DEPENDENT_TYPE(PackExpansion, Type) TYPE(ObjCObject, Type) TYPE(ObjCInterface, ObjCObjectType) TYPE(ObjCObjectPointer, Type) +TYPE(Pipe, Type) TYPE(Atomic, Type) #ifdef LAST_TYPE diff --git a/contrib/llvm/tools/clang/include/clang/ASTMatchers/ASTMatchersInternal.h b/contrib/llvm/tools/clang/include/clang/ASTMatchers/ASTMatchersInternal.h index d499091..1d1d795 100644 --- a/contrib/llvm/tools/clang/include/clang/ASTMatchers/ASTMatchersInternal.h +++ b/contrib/llvm/tools/clang/include/clang/ASTMatchers/ASTMatchersInternal.h @@ -560,10 +560,10 @@ bool matchesFirstInPointerRange(const MatcherT &Matcher, IteratorT Start, // Metafunction to determine if type T has a member called // getDecl. -#if defined(_MSC_VER) && (_MSC_VER < 1900) && !defined(__clang__) -// For old versions of MSVC, we use a weird nonstandard __if_exists -// statement, since before MSVC2015, it was not standards-conformant -// enough to compile the usual code below. +#if defined(_MSC_VER) && !defined(__clang__) +// For MSVC, we use a weird nonstandard __if_exists statement, as it +// is not standards-conformant enough to properly compile the standard +// code below. (At least up through MSVC 2015 require this workaround) template <typename T> struct has_getDecl { __if_exists(T::getDecl) { enum { value = 1 }; diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticDriverKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticDriverKinds.td index ce270bf..b04498f 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -22,6 +22,7 @@ def err_drv_unknown_stdin_type_clang_cl : Error< def err_drv_unknown_language : Error<"language not recognized: '%0'">; def err_drv_invalid_arch_name : Error< "invalid arch name '%0'">; +def err_drv_cuda_bad_gpu_arch : Error<"Unsupported CUDA gpu architecture: %0">; def err_drv_invalid_thread_model_for_target : Error< "invalid thread model '%0' in '%1' for this target">; def err_drv_invalid_linker_name : Error< diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td index 8e5f57d..2e4e57b 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td @@ -616,6 +616,7 @@ def Most : DiagGroup<"most", [ CharSubscript, Comment, DeleteNonVirtualDtor, + ForLoopAnalysis, Format, Implicit, InfiniteRecursion, diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticIDs.h b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticIDs.h index a675dfa..312b71f 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticIDs.h +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticIDs.h @@ -29,7 +29,7 @@ namespace clang { enum { DIAG_START_COMMON = 0, DIAG_START_DRIVER = DIAG_START_COMMON + 300, - DIAG_START_FRONTEND = DIAG_START_DRIVER + 100, + DIAG_START_FRONTEND = DIAG_START_DRIVER + 200, DIAG_START_SERIALIZATION = DIAG_START_FRONTEND + 100, DIAG_START_LEX = DIAG_START_SERIALIZATION + 120, DIAG_START_PARSE = DIAG_START_LEX + 300, diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticLexKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticLexKinds.td index ed6ff20..2fc9664 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -490,6 +490,8 @@ def warn_pragma_diagnostic_unknown_warning : // - #pragma __debug def warn_pragma_debug_unexpected_command : Warning< "unexpected debug command '%0'">, InGroup<IgnoredPragmas>; +def warn_pragma_debug_missing_argument : Warning< + "missing argument to debug command '%0'">, InGroup<IgnoredPragmas>; def err_defined_macro_name : Error<"'defined' cannot be used as a macro name">; def err_paste_at_start : Error< diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 59f5095..6ba482c 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -1,4 +1,3 @@ - //==--- DiagnosticSemaKinds.td - libsema diagnostics ----------------------===// // // The LLVM Compiler Infrastructure @@ -7643,6 +7642,10 @@ def err_wrong_sampler_addressspace: Error< "sampler type cannot be used with the __local and __global address space qualifiers">; def err_opencl_global_invalid_addr_space : Error< "program scope variable must reside in %0 address space">; +def err_missing_actual_pipe_type : Error< + "missing actual type specifier for pipe">; +def err_reference_pipe_type : Error < + "pipes packet types cannot be of reference type">; def err_opencl_no_main : Error<"%select{function|kernel}0 cannot be called 'main'">; def err_opencl_kernel_attr : Error<"attribute %0 can only be applied to a kernel function">; diff --git a/contrib/llvm/tools/clang/include/clang/Basic/Specifiers.h b/contrib/llvm/tools/clang/include/clang/Basic/Specifiers.h index 1d59d64..e284171 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/Specifiers.h +++ b/contrib/llvm/tools/clang/include/clang/Basic/Specifiers.h @@ -36,6 +36,11 @@ namespace clang { TSS_unsigned }; + enum TypeSpecifiersPipe { + TSP_unspecified, + TSP_pipe + }; + /// \brief Specifies the kind of type. enum TypeSpecifierType { TST_unspecified, diff --git a/contrib/llvm/tools/clang/include/clang/Basic/TokenKinds.def b/contrib/llvm/tools/clang/include/clang/Basic/TokenKinds.def index 9252d99..0269451 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/TokenKinds.def +++ b/contrib/llvm/tools/clang/include/clang/Basic/TokenKinds.def @@ -519,6 +519,8 @@ KEYWORD(vec_step , KEYOPENCL|KEYALTIVEC|KEYZVECTOR) // OpenMP Type Traits KEYWORD(__builtin_omp_required_simd_align, KEYALL) +KEYWORD(pipe , KEYOPENCL) + // Borland Extensions. KEYWORD(__pascal , KEYALL) @@ -697,6 +699,11 @@ ANNOTATION(pragma_parser_crash) // handles them. ANNOTATION(pragma_captured) +// Annotation for #pragma clang __debug dump... +// The lexer produces these so that the parser and semantic analysis can +// look up and dump the operand. +ANNOTATION(pragma_dump) + // Annotation for #pragma ms_struct... // The lexer produces these so that they only take effect when the parser // handles them. diff --git a/contrib/llvm/tools/clang/include/clang/Basic/VirtualFileSystem.h b/contrib/llvm/tools/clang/include/clang/Basic/VirtualFileSystem.h index 1df4947..bab88c9 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/VirtualFileSystem.h +++ b/contrib/llvm/tools/clang/include/clang/Basic/VirtualFileSystem.h @@ -299,10 +299,7 @@ public: llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override { return WorkingDirectory; } - std::error_code setCurrentWorkingDirectory(const Twine &Path) override { - WorkingDirectory = Path.str(); - return std::error_code(); - } + std::error_code setCurrentWorkingDirectory(const Twine &Path) override; }; /// \brief Get a globally unique ID for a virtual file or directory. diff --git a/contrib/llvm/tools/clang/include/clang/CodeGen/BackendUtil.h b/contrib/llvm/tools/clang/include/clang/CodeGen/BackendUtil.h index ba5dc39..d375a78 100644 --- a/contrib/llvm/tools/clang/include/clang/CodeGen/BackendUtil.h +++ b/contrib/llvm/tools/clang/include/clang/CodeGen/BackendUtil.h @@ -33,12 +33,10 @@ namespace clang { Backend_EmitObj ///< Emit native object files }; - void - EmitBackendOutput(DiagnosticsEngine &Diags, const CodeGenOptions &CGOpts, - const TargetOptions &TOpts, const LangOptions &LOpts, - StringRef TDesc, llvm::Module *M, BackendAction Action, - raw_pwrite_stream *OS, - std::unique_ptr<llvm::FunctionInfoIndex> Index = nullptr); + void EmitBackendOutput(DiagnosticsEngine &Diags, const CodeGenOptions &CGOpts, + const TargetOptions &TOpts, const LangOptions &LOpts, + StringRef TDesc, llvm::Module *M, BackendAction Action, + raw_pwrite_stream *OS); } #endif diff --git a/contrib/llvm/tools/clang/include/clang/Driver/Action.h b/contrib/llvm/tools/clang/include/clang/Driver/Action.h index fc31d4b..c5b0f47 100644 --- a/contrib/llvm/tools/clang/include/clang/Driver/Action.h +++ b/contrib/llvm/tools/clang/include/clang/Driver/Action.h @@ -15,6 +15,9 @@ #include "llvm/ADT/SmallVector.h" namespace llvm { + +class StringRef; + namespace opt { class Arg; } @@ -32,6 +35,9 @@ namespace driver { /// single primary output, at least in terms of controlling the /// compilation. Actions can produce auxiliary files, but can only /// produce a single output to feed into subsequent actions. +/// +/// Actions are usually owned by a Compilation, which creates new +/// actions via MakeAction(). class Action { public: typedef ActionList::size_type size_type; @@ -70,27 +76,20 @@ private: ActionList Inputs; - unsigned OwnsInputs : 1; - protected: - Action(ActionClass Kind, types::ID Type) - : Kind(Kind), Type(Type), OwnsInputs(true) {} - Action(ActionClass Kind, std::unique_ptr<Action> Input, types::ID Type) - : Kind(Kind), Type(Type), Inputs(1, Input.release()), OwnsInputs(true) { - } - Action(ActionClass Kind, std::unique_ptr<Action> Input) - : Kind(Kind), Type(Input->getType()), Inputs(1, Input.release()), - OwnsInputs(true) {} + Action(ActionClass Kind, types::ID Type) : Action(Kind, ActionList(), Type) {} + Action(ActionClass Kind, Action *Input, types::ID Type) + : Action(Kind, ActionList({Input}), Type) {} + Action(ActionClass Kind, Action *Input) + : Action(Kind, ActionList({Input}), Input->getType()) {} Action(ActionClass Kind, const ActionList &Inputs, types::ID Type) - : Kind(Kind), Type(Type), Inputs(Inputs), OwnsInputs(true) {} + : Kind(Kind), Type(Type), Inputs(Inputs) {} + public: virtual ~Action(); const char *getClassName() const { return Action::getClassName(getKind()); } - bool getOwnsInputs() { return OwnsInputs; } - void setOwnsInputs(bool Value) { OwnsInputs = Value; } - ActionClass getKind() const { return Kind; } types::ID getType() const { return Type; } @@ -126,7 +125,7 @@ class BindArchAction : public Action { const char *ArchName; public: - BindArchAction(std::unique_ptr<Action> Input, const char *ArchName); + BindArchAction(Action *Input, const char *ArchName); const char *getArchName() const { return ArchName; } @@ -137,19 +136,24 @@ public: class CudaDeviceAction : public Action { virtual void anchor(); - /// GPU architecture to bind -- e.g 'sm_35'. + /// GPU architecture to bind. Always of the form /sm_\d+/. const char *GpuArchName; /// True when action results are not consumed by the host action (e.g when /// -fsyntax-only or --cuda-device-only options are used). bool AtTopLevel; public: - CudaDeviceAction(std::unique_ptr<Action> Input, const char *ArchName, - bool AtTopLevel); + CudaDeviceAction(Action *Input, const char *ArchName, bool AtTopLevel); const char *getGpuArchName() const { return GpuArchName; } + + /// Gets the compute_XX that corresponds to getGpuArchName(). + const char *getComputeArchName() const; + bool isAtTopLevel() const { return AtTopLevel; } + static bool IsValidGpuArchName(llvm::StringRef ArchName); + static bool classof(const Action *A) { return A->getKind() == CudaDeviceClass; } @@ -160,9 +164,7 @@ class CudaHostAction : public Action { ActionList DeviceActions; public: - CudaHostAction(std::unique_ptr<Action> Input, - const ActionList &DeviceActions); - ~CudaHostAction() override; + CudaHostAction(Action *Input, const ActionList &DeviceActions); const ActionList &getDeviceActions() const { return DeviceActions; } @@ -172,7 +174,7 @@ public: class JobAction : public Action { virtual void anchor(); protected: - JobAction(ActionClass Kind, std::unique_ptr<Action> Input, types::ID Type); + JobAction(ActionClass Kind, Action *Input, types::ID Type); JobAction(ActionClass Kind, const ActionList &Inputs, types::ID Type); public: @@ -185,7 +187,7 @@ public: class PreprocessJobAction : public JobAction { void anchor() override; public: - PreprocessJobAction(std::unique_ptr<Action> Input, types::ID OutputType); + PreprocessJobAction(Action *Input, types::ID OutputType); static bool classof(const Action *A) { return A->getKind() == PreprocessJobClass; @@ -195,7 +197,7 @@ public: class PrecompileJobAction : public JobAction { void anchor() override; public: - PrecompileJobAction(std::unique_ptr<Action> Input, types::ID OutputType); + PrecompileJobAction(Action *Input, types::ID OutputType); static bool classof(const Action *A) { return A->getKind() == PrecompileJobClass; @@ -205,7 +207,7 @@ public: class AnalyzeJobAction : public JobAction { void anchor() override; public: - AnalyzeJobAction(std::unique_ptr<Action> Input, types::ID OutputType); + AnalyzeJobAction(Action *Input, types::ID OutputType); static bool classof(const Action *A) { return A->getKind() == AnalyzeJobClass; @@ -215,7 +217,7 @@ public: class MigrateJobAction : public JobAction { void anchor() override; public: - MigrateJobAction(std::unique_ptr<Action> Input, types::ID OutputType); + MigrateJobAction(Action *Input, types::ID OutputType); static bool classof(const Action *A) { return A->getKind() == MigrateJobClass; @@ -225,7 +227,7 @@ public: class CompileJobAction : public JobAction { void anchor() override; public: - CompileJobAction(std::unique_ptr<Action> Input, types::ID OutputType); + CompileJobAction(Action *Input, types::ID OutputType); static bool classof(const Action *A) { return A->getKind() == CompileJobClass; @@ -235,7 +237,7 @@ public: class BackendJobAction : public JobAction { void anchor() override; public: - BackendJobAction(std::unique_ptr<Action> Input, types::ID OutputType); + BackendJobAction(Action *Input, types::ID OutputType); static bool classof(const Action *A) { return A->getKind() == BackendJobClass; @@ -245,7 +247,7 @@ public: class AssembleJobAction : public JobAction { void anchor() override; public: - AssembleJobAction(std::unique_ptr<Action> Input, types::ID OutputType); + AssembleJobAction(Action *Input, types::ID OutputType); static bool classof(const Action *A) { return A->getKind() == AssembleJobClass; @@ -285,8 +287,7 @@ public: class VerifyJobAction : public JobAction { void anchor() override; public: - VerifyJobAction(ActionClass Kind, std::unique_ptr<Action> Input, - types::ID Type); + VerifyJobAction(ActionClass Kind, Action *Input, types::ID Type); static bool classof(const Action *A) { return A->getKind() == VerifyDebugInfoJobClass || A->getKind() == VerifyPCHJobClass; @@ -296,7 +297,7 @@ public: class VerifyDebugInfoJobAction : public VerifyJobAction { void anchor() override; public: - VerifyDebugInfoJobAction(std::unique_ptr<Action> Input, types::ID Type); + VerifyDebugInfoJobAction(Action *Input, types::ID Type); static bool classof(const Action *A) { return A->getKind() == VerifyDebugInfoJobClass; } @@ -305,7 +306,7 @@ public: class VerifyPCHJobAction : public VerifyJobAction { void anchor() override; public: - VerifyPCHJobAction(std::unique_ptr<Action> Input, types::ID Type); + VerifyPCHJobAction(Action *Input, types::ID Type); static bool classof(const Action *A) { return A->getKind() == VerifyPCHJobClass; } diff --git a/contrib/llvm/tools/clang/include/clang/Driver/Compilation.h b/contrib/llvm/tools/clang/include/clang/Driver/Compilation.h index 12ff068..3ed1913 100644 --- a/contrib/llvm/tools/clang/include/clang/Driver/Compilation.h +++ b/contrib/llvm/tools/clang/include/clang/Driver/Compilation.h @@ -48,7 +48,12 @@ class Compilation { /// own argument translation. llvm::opt::DerivedArgList *TranslatedArgs; - /// The list of actions. + /// The list of actions we've created via MakeAction. This is not accessible + /// to consumers; it's here just to manage ownership. + std::vector<std::unique_ptr<Action>> AllActions; + + /// The list of actions. This is maintained and modified by consumers, via + /// getActions(). ActionList Actions; /// The root list of jobs. @@ -105,6 +110,15 @@ public: ActionList &getActions() { return Actions; } const ActionList &getActions() const { return Actions; } + /// Creates a new Action owned by this Compilation. + /// + /// The new Action is *not* added to the list returned by getActions(). + template <typename T, typename... Args> T *MakeAction(Args &&... Arg) { + T *RawPtr = new T(std::forward<Args>(Arg)...); + AllActions.push_back(std::unique_ptr<Action>(RawPtr)); + return RawPtr; + } + JobList &getJobs() { return Jobs; } const JobList &getJobs() const { return Jobs; } diff --git a/contrib/llvm/tools/clang/include/clang/Driver/Driver.h b/contrib/llvm/tools/clang/include/clang/Driver/Driver.h index c9940ba..a229779 100644 --- a/contrib/llvm/tools/clang/include/clang/Driver/Driver.h +++ b/contrib/llvm/tools/clang/include/clang/Driver/Driver.h @@ -18,10 +18,10 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" -#include "llvm/Support/Path.h" // FIXME: Kill when CompilationInfo -#include <memory> - // lands. +#include "llvm/Support/Path.h" // FIXME: Kill when CompilationInfo lands. + #include <list> +#include <memory> #include <set> #include <string> @@ -375,20 +375,16 @@ public: /// ConstructAction - Construct the appropriate action to do for /// \p Phase on the \p Input, taking in to account arguments /// like -fsyntax-only or --analyze. - std::unique_ptr<Action> - ConstructPhaseAction(const ToolChain &TC, const llvm::opt::ArgList &Args, - phases::ID Phase, std::unique_ptr<Action> Input) const; + Action *ConstructPhaseAction(Compilation &C, const ToolChain &TC, + const llvm::opt::ArgList &Args, phases::ID Phase, + Action *Input) const; /// BuildJobsForAction - Construct the jobs to perform for the - /// action \p A. - void BuildJobsForAction(Compilation &C, - const Action *A, - const ToolChain *TC, - const char *BoundArch, - bool AtTopLevel, - bool MultipleArchs, - const char *LinkingOutput, - InputInfo &Result) const; + /// action \p A and return an InputInfo for the result of running \p A. + InputInfo BuildJobsForAction(Compilation &C, const Action *A, + const ToolChain *TC, const char *BoundArch, + bool AtTopLevel, bool MultipleArchs, + const char *LinkingOutput) const; /// Returns the default name for linked images (e.g., "a.out"). const char *getDefaultImageName() const; diff --git a/contrib/llvm/tools/clang/include/clang/Driver/Options.td b/contrib/llvm/tools/clang/include/clang/Driver/Options.td index e219a9b..e4279e8 100644 --- a/contrib/llvm/tools/clang/include/clang/Driver/Options.td +++ b/contrib/llvm/tools/clang/include/clang/Driver/Options.td @@ -1906,13 +1906,13 @@ def _ : Joined<["--"], "">, Flags<[Unsupported]>; def mieee_rnd_near : Flag<["-"], "mieee-rnd-near">, Group<m_hexagon_Features_Group>; def mv4 : Flag<["-"], "mv4">, Group<m_hexagon_Features_Group>, - Alias<mcpu_EQ>, AliasArgs<["v4"]>; + Alias<mcpu_EQ>, AliasArgs<["hexagonv4"]>; def mv5 : Flag<["-"], "mv5">, Group<m_hexagon_Features_Group>, Alias<mcpu_EQ>, - AliasArgs<["v5"]>; + AliasArgs<["hexagonv5"]>; def mv55 : Flag<["-"], "mv55">, Group<m_hexagon_Features_Group>, - Alias<mcpu_EQ>, AliasArgs<["v55"]>; + Alias<mcpu_EQ>, AliasArgs<["hexagonv55"]>; def mv60 : Flag<["-"], "mv60">, Group<m_hexagon_Features_Group>, - Alias<mcpu_EQ>, AliasArgs<["v60"]>; + Alias<mcpu_EQ>, AliasArgs<["hexagonv60"]>; def mhexagon_hvx : Flag<["-"], "mhvx">, Group<m_hexagon_Features_Group>, Flags<[CC1Option]>, HelpText<"Enable Hexagon Vector eXtensions">; def mno_hexagon_hvx : Flag<["-"], "mno-hvx">, Group<m_hexagon_Features_Group>, diff --git a/contrib/llvm/tools/clang/include/clang/Driver/ToolChain.h b/contrib/llvm/tools/clang/include/clang/Driver/ToolChain.h index ed73107..7e68d0a 100644 --- a/contrib/llvm/tools/clang/include/clang/Driver/ToolChain.h +++ b/contrib/llvm/tools/clang/include/clang/Driver/ToolChain.h @@ -134,7 +134,7 @@ public: StringRef getOS() const { return Triple.getOSName(); } /// \brief Provide the default architecture name (as expected by -arch) for - /// this toolchain. Note t + /// this toolchain. StringRef getDefaultUniversalArchName() const; std::string getTripleString() const { diff --git a/contrib/llvm/tools/clang/include/clang/Lex/LiteralSupport.h b/contrib/llvm/tools/clang/include/clang/Lex/LiteralSupport.h index 5210e3f..d568614 100644 --- a/contrib/llvm/tools/clang/include/clang/Lex/LiteralSupport.h +++ b/contrib/llvm/tools/clang/include/clang/Lex/LiteralSupport.h @@ -166,6 +166,7 @@ public: bool hadError() const { return HadError; } bool isAscii() const { return Kind == tok::char_constant; } bool isWide() const { return Kind == tok::wide_char_constant; } + bool isUTF8() const { return Kind == tok::utf8_char_constant; } bool isUTF16() const { return Kind == tok::utf16_char_constant; } bool isUTF32() const { return Kind == tok::utf32_char_constant; } bool isMultiChar() const { return IsMultiChar; } diff --git a/contrib/llvm/tools/clang/include/clang/Parse/Parser.h b/contrib/llvm/tools/clang/include/clang/Parse/Parser.h index 82b7798..00885a5 100644 --- a/contrib/llvm/tools/clang/include/clang/Parse/Parser.h +++ b/contrib/llvm/tools/clang/include/clang/Parse/Parser.h @@ -502,6 +502,10 @@ private: void HandlePragmaAlign(); /// \brief Handle the annotation token produced for + /// #pragma clang __debug dump... + void HandlePragmaDump(); + + /// \brief Handle the annotation token produced for /// #pragma weak id... void HandlePragmaWeak(); @@ -1640,13 +1644,22 @@ private: /// A SmallVector of types. typedef SmallVector<ParsedType, 12> TypeVector; - StmtResult ParseStatement(SourceLocation *TrailingElseLoc = nullptr); + StmtResult ParseStatement(SourceLocation *TrailingElseLoc = nullptr, + bool AllowOpenMPStandalone = false); + enum AllowedContsructsKind { + /// \brief Allow any declarations, statements, OpenMP directives. + ACK_Any, + /// \brief Allow only statements and non-standalone OpenMP directives. + ACK_StatementsOpenMPNonStandalone, + /// \brief Allow statements and all executable OpenMP directives + ACK_StatementsOpenMPAnyExecutable + }; StmtResult - ParseStatementOrDeclaration(StmtVector &Stmts, bool OnlyStatement, + ParseStatementOrDeclaration(StmtVector &Stmts, AllowedContsructsKind Allowed, SourceLocation *TrailingElseLoc = nullptr); StmtResult ParseStatementOrDeclarationAfterAttributes( StmtVector &Stmts, - bool OnlyStatement, + AllowedContsructsKind Allowed, SourceLocation *TrailingElseLoc, ParsedAttributesWithRange &Attrs); StmtResult ParseExprStatement(); @@ -1674,7 +1687,8 @@ private: StmtResult ParseReturnStatement(); StmtResult ParseAsmStatement(bool &msAsm); StmtResult ParseMicrosoftAsmStatement(SourceLocation AsmLoc); - StmtResult ParsePragmaLoopHint(StmtVector &Stmts, bool OnlyStatement, + StmtResult ParsePragmaLoopHint(StmtVector &Stmts, + AllowedContsructsKind Allowed, SourceLocation *TrailingElseLoc, ParsedAttributesWithRange &Attrs); @@ -2439,11 +2453,13 @@ private: bool AllowScopeSpecifier); /// \brief Parses declarative or executable directive. /// - /// \param StandAloneAllowed true if allowed stand-alone directives, - /// false - otherwise + /// \param Allowed ACK_Any, if any directives are allowed, + /// ACK_StatementsOpenMPAnyExecutable - if any executable directives are + /// allowed, ACK_StatementsOpenMPNonStandalone - if only non-standalone + /// executable directives are allowed. /// StmtResult - ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed); + ParseOpenMPDeclarativeOrExecutableDirective(AllowedContsructsKind Allowed); /// \brief Parses clause of kind \a CKind for directive of a kind \a Kind. /// /// \param DKind Kind of current directive. diff --git a/contrib/llvm/tools/clang/include/clang/Sema/DeclSpec.h b/contrib/llvm/tools/clang/include/clang/Sema/DeclSpec.h index e9fdb70..064d37b 100644 --- a/contrib/llvm/tools/clang/include/clang/Sema/DeclSpec.h +++ b/contrib/llvm/tools/clang/include/clang/Sema/DeclSpec.h @@ -337,6 +337,7 @@ private: unsigned TypeAltiVecPixel : 1; unsigned TypeAltiVecBool : 1; unsigned TypeSpecOwned : 1; + unsigned TypeSpecPipe : 1; // type-qualifiers unsigned TypeQualifiers : 4; // Bitwise OR of TQ. @@ -385,6 +386,7 @@ private: SourceLocation FS_inlineLoc, FS_virtualLoc, FS_explicitLoc, FS_noreturnLoc; SourceLocation FS_forceinlineLoc; SourceLocation FriendLoc, ModulePrivateLoc, ConstexprLoc, ConceptLoc; + SourceLocation TQ_pipeLoc; WrittenBuiltinSpecs writtenBS; void SaveWrittenBuiltinSpecs(); @@ -420,6 +422,7 @@ public: TypeAltiVecPixel(false), TypeAltiVecBool(false), TypeSpecOwned(false), + TypeSpecPipe(false), TypeQualifiers(TQ_unspecified), FS_inline_specified(false), FS_forceinline_specified(false), @@ -473,6 +476,7 @@ public: bool isTypeAltiVecBool() const { return TypeAltiVecBool; } bool isTypeSpecOwned() const { return TypeSpecOwned; } bool isTypeRep() const { return isTypeRep((TST) TypeSpecType); } + bool isTypeSpecPipe() const { return TypeSpecPipe; } ParsedType getRepAsType() const { assert(isTypeRep((TST) TypeSpecType) && "DeclSpec does not store a type"); @@ -532,6 +536,7 @@ public: SourceLocation getRestrictSpecLoc() const { return TQ_restrictLoc; } SourceLocation getVolatileSpecLoc() const { return TQ_volatileLoc; } SourceLocation getAtomicSpecLoc() const { return TQ_atomicLoc; } + SourceLocation getPipeLoc() const { return TQ_pipeLoc; } /// \brief Clear out all of the type qualifiers. void ClearTypeQualifiers() { @@ -540,6 +545,7 @@ public: TQ_restrictLoc = SourceLocation(); TQ_volatileLoc = SourceLocation(); TQ_atomicLoc = SourceLocation(); + TQ_pipeLoc = SourceLocation(); } // function-specifier @@ -643,6 +649,9 @@ public: bool SetTypeAltiVecBool(bool isAltiVecBool, SourceLocation Loc, const char *&PrevSpec, unsigned &DiagID, const PrintingPolicy &Policy); + bool SetTypePipe(bool isPipe, SourceLocation Loc, + const char *&PrevSpec, unsigned &DiagID, + const PrintingPolicy &Policy); bool SetTypeSpecError(); void UpdateDeclRep(Decl *Rep) { assert(isDeclRep((TST) TypeSpecType)); @@ -1081,7 +1090,7 @@ typedef SmallVector<Token, 4> CachedTokens; /// This is intended to be a small value object. struct DeclaratorChunk { enum { - Pointer, Reference, Array, Function, BlockPointer, MemberPointer, Paren + Pointer, Reference, Array, Function, BlockPointer, MemberPointer, Paren, Pipe } Kind; /// Loc - The place where this type was defined. @@ -1409,6 +1418,13 @@ struct DeclaratorChunk { } }; + struct PipeTypeInfo : TypeInfoCommon { + /// The access writes. + unsigned AccessWrites : 3; + + void destroy() {} + }; + union { TypeInfoCommon Common; PointerTypeInfo Ptr; @@ -1417,6 +1433,7 @@ struct DeclaratorChunk { FunctionTypeInfo Fun; BlockPointerTypeInfo Cls; MemberPointerTypeInfo Mem; + PipeTypeInfo PipeInfo; }; void destroy() { @@ -1428,6 +1445,7 @@ struct DeclaratorChunk { case DeclaratorChunk::Array: return Arr.destroy(); case DeclaratorChunk::MemberPointer: return Mem.destroy(); case DeclaratorChunk::Paren: return; + case DeclaratorChunk::Pipe: return PipeInfo.destroy(); } } @@ -1526,6 +1544,17 @@ struct DeclaratorChunk { return I; } + /// \brief Return a DeclaratorChunk for a block. + static DeclaratorChunk getPipe(unsigned TypeQuals, + SourceLocation Loc) { + DeclaratorChunk I; + I.Kind = Pipe; + I.Loc = Loc; + I.Cls.TypeQuals = TypeQuals; + I.Cls.AttrList = 0; + return I; + } + static DeclaratorChunk getMemberPointer(const CXXScopeSpec &SS, unsigned TypeQuals, SourceLocation Loc) { @@ -2026,6 +2055,7 @@ public: case DeclaratorChunk::Array: case DeclaratorChunk::BlockPointer: case DeclaratorChunk::MemberPointer: + case DeclaratorChunk::Pipe: return false; } llvm_unreachable("Invalid type chunk"); diff --git a/contrib/llvm/tools/clang/include/clang/Sema/Lookup.h b/contrib/llvm/tools/clang/include/clang/Sema/Lookup.h index 87c40f0..7efb19f 100644 --- a/contrib/llvm/tools/clang/include/clang/Sema/Lookup.h +++ b/contrib/llvm/tools/clang/include/clang/Sema/Lookup.h @@ -515,6 +515,7 @@ public: configure(); } + void dump(); void print(raw_ostream &); /// Suppress the diagnostics that would normally fire because of this diff --git a/contrib/llvm/tools/clang/include/clang/Sema/Overload.h b/contrib/llvm/tools/clang/include/clang/Sema/Overload.h index 20958b0..6243795 100644 --- a/contrib/llvm/tools/clang/include/clang/Sema/Overload.h +++ b/contrib/llvm/tools/clang/include/clang/Sema/Overload.h @@ -570,8 +570,8 @@ namespace clang { /// This conversion candidate is not viable because its result /// type is not implicitly convertible to the desired type. ovl_fail_bad_final_conversion, - - /// This conversion function template specialization candidate is not + + /// This conversion function template specialization candidate is not /// viable because the final conversion was not an exact match. ovl_fail_final_conversion_not_exact, @@ -582,7 +582,10 @@ namespace clang { /// This candidate function was not viable because an enable_if /// attribute disabled it. - ovl_fail_enable_if + ovl_fail_enable_if, + + /// This candidate was not viable because its address could not be taken. + ovl_fail_addr_not_available }; /// OverloadCandidate - A single candidate in an overload set (C++ 13.3). diff --git a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h index 77d06f2..ffe1ff3 100644 --- a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h +++ b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h @@ -1269,6 +1269,8 @@ public: SourceLocation Loc, DeclarationName Entity); QualType BuildParenType(QualType T); QualType BuildAtomicType(QualType T, SourceLocation Loc); + QualType BuildPipeType(QualType T, + SourceLocation Loc); TypeSourceInfo *GetTypeForDeclarator(Declarator &D, Scope *S); TypeSourceInfo *GetTypeForDeclaratorCast(Declarator &D, QualType FromTy); @@ -2548,7 +2550,8 @@ public: MultiExprArg Args, SourceLocation RParenLoc, Expr *ExecConfig, - bool AllowTypoCorrection=true); + bool AllowTypoCorrection=true, + bool CalleesAddressIsTaken=false); bool buildOverloadedCallSet(Scope *S, Expr *Fn, UnresolvedLookupExpr *ULE, MultiExprArg Args, SourceLocation RParenLoc, @@ -7626,6 +7629,9 @@ public: void ActOnPragmaMSInitSeg(SourceLocation PragmaLocation, StringLiteral *SegmentName); + /// \brief Called on #pragma clang __debug dump II + void ActOnPragmaDump(Scope *S, SourceLocation Loc, IdentifierInfo *II); + /// ActOnPragmaDetectMismatch - Call on well-formed \#pragma detect_mismatch void ActOnPragmaDetectMismatch(StringRef Name, StringRef Value); @@ -8583,6 +8589,10 @@ public: bool CheckVectorCast(SourceRange R, QualType VectorTy, QualType Ty, CastKind &Kind); + /// \brief Prepare `SplattedExpr` for a vector splat operation, adding + /// implicit casts if necessary. + ExprResult prepareVectorSplat(QualType VectorTy, Expr *SplattedExpr); + // CheckExtVectorCast - check type constraints for extended vectors. // Since vectors are an extension, there are no C standard reference for this. // We allow casting between vectors and integer datatypes of the same size, diff --git a/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h b/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h index 16bda6e..0dfb8cf 100644 --- a/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h +++ b/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h @@ -907,7 +907,9 @@ namespace clang { /// \brief A DecayedType record. TYPE_DECAYED = 41, /// \brief An AdjustedType record. - TYPE_ADJUSTED = 42 + TYPE_ADJUSTED = 42, + /// \brief A PipeType record. + TYPE_PIPE = 43 }; /// \brief The type IDs for special types constructed by semantic diff --git a/contrib/llvm/tools/clang/include/clang/Serialization/ASTWriter.h b/contrib/llvm/tools/clang/include/clang/Serialization/ASTWriter.h index ed34547..ef8c653 100644 --- a/contrib/llvm/tools/clang/include/clang/Serialization/ASTWriter.h +++ b/contrib/llvm/tools/clang/include/clang/Serialization/ASTWriter.h @@ -871,6 +871,7 @@ public: const FunctionDecl *Delete) override; void CompletedImplicitDefinition(const FunctionDecl *D) override; void StaticDataMemberInstantiated(const VarDecl *D) override; + void DefaultArgumentInstantiated(const ParmVarDecl *D) override; void FunctionDefinitionInstantiated(const FunctionDecl *D) override; void AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD, const ObjCInterfaceDecl *IFD) override; diff --git a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h index bb835c4..43f6e5c 100644 --- a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h +++ b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h @@ -80,7 +80,7 @@ class MemRegion : public llvm::FoldingSetNode { public: enum Kind { // Memory spaces. - GenericMemSpaceRegionKind, + CodeSpaceRegionKind, StackLocalsSpaceRegionKind, StackArgumentsSpaceRegionKind, HeapSpaceRegionKind, @@ -89,29 +89,29 @@ public: GlobalInternalSpaceRegionKind, GlobalSystemSpaceRegionKind, GlobalImmutableSpaceRegionKind, - BEG_NON_STATIC_GLOBAL_MEMSPACES = GlobalInternalSpaceRegionKind, + BEGIN_NON_STATIC_GLOBAL_MEMSPACES = GlobalInternalSpaceRegionKind, END_NON_STATIC_GLOBAL_MEMSPACES = GlobalImmutableSpaceRegionKind, - BEG_GLOBAL_MEMSPACES = StaticGlobalSpaceRegionKind, + BEGIN_GLOBAL_MEMSPACES = StaticGlobalSpaceRegionKind, END_GLOBAL_MEMSPACES = GlobalImmutableSpaceRegionKind, - BEG_MEMSPACES = GenericMemSpaceRegionKind, + BEGIN_MEMSPACES = CodeSpaceRegionKind, END_MEMSPACES = GlobalImmutableSpaceRegionKind, // Untyped regions. SymbolicRegionKind, AllocaRegionKind, // Typed regions. - BEG_TYPED_REGIONS, - FunctionTextRegionKind = BEG_TYPED_REGIONS, - BlockTextRegionKind, + BEGIN_TYPED_REGIONS, + FunctionCodeRegionKind = BEGIN_TYPED_REGIONS, + BlockCodeRegionKind, BlockDataRegionKind, - BEG_TYPED_VALUE_REGIONS, - CompoundLiteralRegionKind = BEG_TYPED_VALUE_REGIONS, + BEGIN_TYPED_VALUE_REGIONS, + CompoundLiteralRegionKind = BEGIN_TYPED_VALUE_REGIONS, CXXThisRegionKind, StringRegionKind, ObjCStringRegionKind, ElementRegionKind, // Decl Regions. - BEG_DECL_REGIONS, - VarRegionKind = BEG_DECL_REGIONS, + BEGIN_DECL_REGIONS, + VarRegionKind = BEGIN_DECL_REGIONS, FieldRegionKind, ObjCIvarRegionKind, END_DECL_REGIONS = ObjCIvarRegionKind, @@ -193,12 +193,9 @@ public: /// for example, the set of global variables, the stack frame, etc. class MemSpaceRegion : public MemRegion { protected: - friend class MemRegionManager; - MemRegionManager *Mgr; - MemSpaceRegion(MemRegionManager *mgr, Kind k = GenericMemSpaceRegionKind) - : MemRegion(k), Mgr(mgr) { + MemSpaceRegion(MemRegionManager *mgr, Kind k) : MemRegion(k), Mgr(mgr) { assert(classof(this)); } @@ -211,10 +208,26 @@ public: static bool classof(const MemRegion *R) { Kind k = R->getKind(); - return k >= BEG_MEMSPACES && k <= END_MEMSPACES; + return k >= BEGIN_MEMSPACES && k <= END_MEMSPACES; } }; - + +/// CodeSpaceRegion - The memory space that holds the executable code of +/// functions and blocks. +class CodeSpaceRegion : public MemSpaceRegion { + friend class MemRegionManager; + + CodeSpaceRegion(MemRegionManager *mgr) + : MemSpaceRegion(mgr, CodeSpaceRegionKind) {} + +public: + void dumpToStream(raw_ostream &os) const override; + + static bool classof(const MemRegion *R) { + return R->getKind() == CodeSpaceRegionKind; + } +}; + class GlobalsSpaceRegion : public MemSpaceRegion { virtual void anchor(); protected: @@ -223,7 +236,7 @@ protected: public: static bool classof(const MemRegion *R) { Kind k = R->getKind(); - return k >= BEG_GLOBAL_MEMSPACES && k <= END_GLOBAL_MEMSPACES; + return k >= BEGIN_GLOBAL_MEMSPACES && k <= END_GLOBAL_MEMSPACES; } }; @@ -259,17 +272,15 @@ public: /// RegionStoreManager::invalidateRegions (instead of finding all the dependent /// globals, we invalidate the whole parent region). class NonStaticGlobalSpaceRegion : public GlobalsSpaceRegion { - friend class MemRegionManager; - protected: NonStaticGlobalSpaceRegion(MemRegionManager *mgr, Kind k) : GlobalsSpaceRegion(mgr, k) {} - + public: static bool classof(const MemRegion *R) { Kind k = R->getKind(); - return k >= BEG_NON_STATIC_GLOBAL_MEMSPACES && + return k >= BEGIN_NON_STATIC_GLOBAL_MEMSPACES && k <= END_NON_STATIC_GLOBAL_MEMSPACES; } }; @@ -357,7 +368,7 @@ public: return R->getKind() == UnknownSpaceRegionKind; } }; - + class StackSpaceRegion : public MemSpaceRegion { private: const StackFrameContext *SFC; @@ -368,18 +379,18 @@ protected: assert(classof(this)); } -public: +public: const StackFrameContext *getStackFrame() const { return SFC; } - + void Profile(llvm::FoldingSetNodeID &ID) const override; static bool classof(const MemRegion *R) { Kind k = R->getKind(); return k >= StackLocalsSpaceRegionKind && k <= StackArgumentsSpaceRegionKind; - } + } }; - + class StackLocalsSpaceRegion : public StackSpaceRegion { virtual void anchor(); friend class MemRegionManager; @@ -491,7 +502,7 @@ public: static bool classof(const MemRegion* R) { unsigned k = R->getKind(); - return k >= BEG_TYPED_REGIONS && k <= END_TYPED_REGIONS; + return k >= BEGIN_TYPED_REGIONS && k <= END_TYPED_REGIONS; } }; @@ -523,7 +534,7 @@ public: static bool classof(const MemRegion* R) { unsigned k = R->getKind(); - return k >= BEG_TYPED_VALUE_REGIONS && k <= END_TYPED_VALUE_REGIONS; + return k >= BEGIN_TYPED_VALUE_REGIONS && k <= END_TYPED_VALUE_REGIONS; } }; @@ -538,16 +549,16 @@ public: static bool classof(const MemRegion* R) { Kind k = R->getKind(); - return k >= FunctionTextRegionKind && k <= BlockTextRegionKind; + return k >= FunctionCodeRegionKind && k <= BlockCodeRegionKind; } }; -/// FunctionTextRegion - A region that represents code texts of function. -class FunctionTextRegion : public CodeTextRegion { +/// FunctionCodeRegion - A region that represents code texts of function. +class FunctionCodeRegion : public CodeTextRegion { const NamedDecl *FD; public: - FunctionTextRegion(const NamedDecl *fd, const MemRegion* sreg) - : CodeTextRegion(sreg, FunctionTextRegionKind), FD(fd) { + FunctionCodeRegion(const NamedDecl *fd, const MemRegion* sreg) + : CodeTextRegion(sreg, FunctionCodeRegionKind), FD(fd) { assert(isa<ObjCMethodDecl>(fd) || isa<FunctionDecl>(fd)); } @@ -577,27 +588,27 @@ public: const MemRegion*); static bool classof(const MemRegion* R) { - return R->getKind() == FunctionTextRegionKind; + return R->getKind() == FunctionCodeRegionKind; } }; -/// BlockTextRegion - A region that represents code texts of blocks (closures). -/// Blocks are represented with two kinds of regions. BlockTextRegions +/// BlockCodeRegion - A region that represents code texts of blocks (closures). +/// Blocks are represented with two kinds of regions. BlockCodeRegions /// represent the "code", while BlockDataRegions represent instances of blocks, /// which correspond to "code+data". The distinction is important, because /// like a closure a block captures the values of externally referenced /// variables. -class BlockTextRegion : public CodeTextRegion { +class BlockCodeRegion : public CodeTextRegion { friend class MemRegionManager; const BlockDecl *BD; AnalysisDeclContext *AC; CanQualType locTy; - BlockTextRegion(const BlockDecl *bd, CanQualType lTy, + BlockCodeRegion(const BlockDecl *bd, CanQualType lTy, AnalysisDeclContext *ac, const MemRegion* sreg) - : CodeTextRegion(sreg, BlockTextRegionKind), BD(bd), AC(ac), locTy(lTy) {} + : CodeTextRegion(sreg, BlockCodeRegionKind), BD(bd), AC(ac), locTy(lTy) {} public: QualType getLocationType() const override { @@ -619,32 +630,32 @@ public: const MemRegion*); static bool classof(const MemRegion* R) { - return R->getKind() == BlockTextRegionKind; + return R->getKind() == BlockCodeRegionKind; } }; /// BlockDataRegion - A region that represents a block instance. -/// Blocks are represented with two kinds of regions. BlockTextRegions +/// Blocks are represented with two kinds of regions. BlockCodeRegions /// represent the "code", while BlockDataRegions represent instances of blocks, /// which correspond to "code+data". The distinction is important, because /// like a closure a block captures the values of externally referenced /// variables. class BlockDataRegion : public TypedRegion { friend class MemRegionManager; - const BlockTextRegion *BC; + const BlockCodeRegion *BC; const LocationContext *LC; // Can be null */ unsigned BlockCount; void *ReferencedVars; void *OriginalVars; - BlockDataRegion(const BlockTextRegion *bc, const LocationContext *lc, + BlockDataRegion(const BlockCodeRegion *bc, const LocationContext *lc, unsigned count, const MemRegion *sreg) : TypedRegion(sreg, BlockDataRegionKind), BC(bc), LC(lc), BlockCount(count), ReferencedVars(nullptr), OriginalVars(nullptr) {} public: - const BlockTextRegion *getCodeRegion() const { return BC; } + const BlockCodeRegion *getCodeRegion() const { return BC; } const BlockDecl *getDecl() const { return BC->getDecl(); } @@ -691,7 +702,7 @@ public: void Profile(llvm::FoldingSetNodeID& ID) const override; - static void ProfileRegion(llvm::FoldingSetNodeID&, const BlockTextRegion *, + static void ProfileRegion(llvm::FoldingSetNodeID&, const BlockCodeRegion *, const LocationContext *, unsigned, const MemRegion *); @@ -856,7 +867,7 @@ public: static bool classof(const MemRegion* R) { unsigned k = R->getKind(); - return k >= BEG_DECL_REGIONS && k <= END_DECL_REGIONS; + return k >= BEGIN_DECL_REGIONS && k <= END_DECL_REGIONS; } }; @@ -1138,7 +1149,7 @@ class MemRegionManager { HeapSpaceRegion *heap; UnknownSpaceRegion *unknown; - MemSpaceRegion *code; + CodeSpaceRegion *code; public: MemRegionManager(ASTContext &c, llvm::BumpPtrAllocator &a) @@ -1174,9 +1185,9 @@ public: /// getUnknownRegion - Retrieve the memory region associated with unknown /// memory space. - const MemSpaceRegion *getUnknownRegion(); + const UnknownSpaceRegion *getUnknownRegion(); - const MemSpaceRegion *getCodeRegion(); + const CodeSpaceRegion *getCodeRegion(); /// getAllocaRegion - Retrieve a region associated with a call to alloca(). const AllocaRegion *getAllocaRegion(const Expr *Ex, unsigned Cnt, @@ -1262,8 +1273,8 @@ public: baseReg->isVirtual()); } - const FunctionTextRegion *getFunctionTextRegion(const NamedDecl *FD); - const BlockTextRegion *getBlockTextRegion(const BlockDecl *BD, + const FunctionCodeRegion *getFunctionCodeRegion(const NamedDecl *FD); + const BlockCodeRegion *getBlockCodeRegion(const BlockDecl *BD, CanQualType locTy, AnalysisDeclContext *AC); @@ -1271,7 +1282,7 @@ public: /// of a block. Unlike many other MemRegions, the LocationContext* /// argument is allowed to be NULL for cases where we have no known /// context. - const BlockDataRegion *getBlockDataRegion(const BlockTextRegion *bc, + const BlockDataRegion *getBlockDataRegion(const BlockCodeRegion *bc, const LocationContext *lc, unsigned blockCount); diff --git a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h index a68d341..3c47114 100644 --- a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h +++ b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h @@ -83,7 +83,11 @@ public: } SVal evalCast(SVal val, QualType castTy, QualType originalType); - + + // Handles casts of type CK_IntegralCast. + SVal evalIntegralCast(ProgramStateRef state, SVal val, QualType castTy, + QualType originalType); + virtual SVal evalMinus(NonLoc val) = 0; virtual SVal evalComplement(NonLoc val) = 0; diff --git a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index 642e11a..d644254 100644 --- a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -45,8 +45,8 @@ class SVal { public: enum BaseKind { // The enumerators must be representable using 2 bits. - UndefinedKind = 0, // for subclass UndefinedVal (an uninitialized value) - UnknownKind = 1, // for subclass UnknownVal (a void value) + UndefinedValKind = 0, // for subclass UndefinedVal (an uninitialized value) + UnknownValKind = 1, // for subclass UnknownVal (a void value) LocKind = 2, // for subclass Loc (an L-value) NonLocKind = 3 // for subclass NonLoc (an R-value that's not // an L-value) @@ -115,19 +115,19 @@ public: } inline bool isUnknown() const { - return getRawKind() == UnknownKind; + return getRawKind() == UnknownValKind; } inline bool isUndef() const { - return getRawKind() == UndefinedKind; + return getRawKind() == UndefinedValKind; } inline bool isUnknownOrUndef() const { - return getRawKind() <= UnknownKind; + return getRawKind() <= UnknownValKind; } inline bool isValid() const { - return getRawKind() > UnknownKind; + return getRawKind() > UnknownValKind; } bool isConstant() const; @@ -190,12 +190,12 @@ public: class UndefinedVal : public SVal { public: - UndefinedVal() : SVal(UndefinedKind) {} + UndefinedVal() : SVal(UndefinedValKind) {} private: friend class SVal; static bool isKind(const SVal& V) { - return V.getBaseKind() == UndefinedKind; + return V.getBaseKind() == UndefinedValKind; } }; @@ -223,12 +223,12 @@ private: class UnknownVal : public DefinedOrUnknownSVal { public: - explicit UnknownVal() : DefinedOrUnknownSVal(UnknownKind) {} + explicit UnknownVal() : DefinedOrUnknownSVal(UnknownValKind) {} private: friend class SVal; static bool isKind(const SVal &V) { - return V.getBaseKind() == UnknownKind; + return V.getBaseKind() == UnknownValKind; } }; @@ -465,7 +465,7 @@ private: namespace loc { -enum Kind { GotoLabelKind, MemRegionKind, ConcreteIntKind }; +enum Kind { GotoLabelKind, MemRegionValKind, ConcreteIntKind }; class GotoLabel : public Loc { public: @@ -490,7 +490,7 @@ private: class MemRegionVal : public Loc { public: - explicit MemRegionVal(const MemRegion* r) : Loc(MemRegionKind, r) {} + explicit MemRegionVal(const MemRegion* r) : Loc(MemRegionValKind, r) {} /// \brief Get the underlining region. const MemRegion* getRegion() const { @@ -518,11 +518,11 @@ private: MemRegionVal() {} static bool isKind(const SVal& V) { return V.getBaseKind() == LocKind && - V.getSubKind() == MemRegionKind; + V.getSubKind() == MemRegionValKind; } static bool isKind(const Loc& V) { - return V.getSubKind() == MemRegionKind; + return V.getSubKind() == MemRegionValKind; } }; diff --git a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h index 9dbfab2..77d12e5 100644 --- a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h +++ b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h @@ -42,14 +42,22 @@ namespace ento { class SymExpr : public llvm::FoldingSetNode { virtual void anchor(); public: - enum Kind { RegionValueKind, ConjuredKind, DerivedKind, ExtentKind, - MetadataKind, - BEGIN_SYMBOLS = RegionValueKind, - END_SYMBOLS = MetadataKind, - SymIntKind, IntSymKind, SymSymKind, - BEGIN_BINARYSYMEXPRS = SymIntKind, - END_BINARYSYMEXPRS = SymSymKind, - CastSymbolKind }; + enum Kind { + SymbolRegionValueKind, + SymbolConjuredKind, + SymbolDerivedKind, + SymbolExtentKind, + SymbolMetadataKind, + BEGIN_SYMBOLS = SymbolRegionValueKind, + END_SYMBOLS = SymbolMetadataKind, + SymIntExprKind, + IntSymExprKind, + SymSymExprKind, + BEGIN_BINARYSYMEXPRS = SymIntExprKind, + END_BINARYSYMEXPRS = SymSymExprKind, + SymbolCastKind + }; + private: Kind K; @@ -126,12 +134,12 @@ class SymbolRegionValue : public SymbolData { public: SymbolRegionValue(SymbolID sym, const TypedValueRegion *r) - : SymbolData(RegionValueKind, sym), R(r) {} + : SymbolData(SymbolRegionValueKind, sym), R(r) {} const TypedValueRegion* getRegion() const { return R; } static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) { - profile.AddInteger((unsigned) RegionValueKind); + profile.AddInteger((unsigned) SymbolRegionValueKind); profile.AddPointer(R); } @@ -145,7 +153,7 @@ public: // Implement isa<T> support. static inline bool classof(const SymExpr *SE) { - return SE->getKind() == RegionValueKind; + return SE->getKind() == SymbolRegionValueKind; } }; @@ -160,11 +168,9 @@ class SymbolConjured : public SymbolData { public: SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx, - QualType t, unsigned count, - const void *symbolTag) - : SymbolData(ConjuredKind, sym), S(s), T(t), Count(count), - LCtx(lctx), - SymbolTag(symbolTag) {} + QualType t, unsigned count, const void *symbolTag) + : SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count), + LCtx(lctx), SymbolTag(symbolTag) {} const Stmt *getStmt() const { return S; } unsigned getCount() const { return Count; } @@ -177,7 +183,7 @@ public: static void Profile(llvm::FoldingSetNodeID& profile, const Stmt *S, QualType T, unsigned Count, const LocationContext *LCtx, const void *SymbolTag) { - profile.AddInteger((unsigned) ConjuredKind); + profile.AddInteger((unsigned) SymbolConjuredKind); profile.AddPointer(S); profile.AddPointer(LCtx); profile.Add(T); @@ -191,7 +197,7 @@ public: // Implement isa<T> support. static inline bool classof(const SymExpr *SE) { - return SE->getKind() == ConjuredKind; + return SE->getKind() == SymbolConjuredKind; } }; @@ -203,7 +209,7 @@ class SymbolDerived : public SymbolData { public: SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r) - : SymbolData(DerivedKind, sym), parentSymbol(parent), R(r) {} + : SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) {} SymbolRef getParentSymbol() const { return parentSymbol; } const TypedValueRegion *getRegion() const { return R; } @@ -214,7 +220,7 @@ public: static void Profile(llvm::FoldingSetNodeID& profile, SymbolRef parent, const TypedValueRegion *r) { - profile.AddInteger((unsigned) DerivedKind); + profile.AddInteger((unsigned) SymbolDerivedKind); profile.AddPointer(r); profile.AddPointer(parent); } @@ -225,7 +231,7 @@ public: // Implement isa<T> support. static inline bool classof(const SymExpr *SE) { - return SE->getKind() == DerivedKind; + return SE->getKind() == SymbolDerivedKind; } }; @@ -237,7 +243,7 @@ class SymbolExtent : public SymbolData { public: SymbolExtent(SymbolID sym, const SubRegion *r) - : SymbolData(ExtentKind, sym), R(r) {} + : SymbolData(SymbolExtentKind, sym), R(r) {} const SubRegion *getRegion() const { return R; } @@ -246,7 +252,7 @@ public: void dumpToStream(raw_ostream &os) const override; static void Profile(llvm::FoldingSetNodeID& profile, const SubRegion *R) { - profile.AddInteger((unsigned) ExtentKind); + profile.AddInteger((unsigned) SymbolExtentKind); profile.AddPointer(R); } @@ -256,7 +262,7 @@ public: // Implement isa<T> support. static inline bool classof(const SymExpr *SE) { - return SE->getKind() == ExtentKind; + return SE->getKind() == SymbolExtentKind; } }; @@ -273,7 +279,7 @@ class SymbolMetadata : public SymbolData { public: SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t, unsigned count, const void *tag) - : SymbolData(MetadataKind, sym), R(r), S(s), T(t), Count(count), Tag(tag) {} + : SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), Count(count), Tag(tag) {} const MemRegion *getRegion() const { return R; } const Stmt *getStmt() const { return S; } @@ -287,7 +293,7 @@ public: static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R, const Stmt *S, QualType T, unsigned Count, const void *Tag) { - profile.AddInteger((unsigned) MetadataKind); + profile.AddInteger((unsigned) SymbolMetadataKind); profile.AddPointer(R); profile.AddPointer(S); profile.Add(T); @@ -301,7 +307,7 @@ public: // Implement isa<T> support. static inline bool classof(const SymExpr *SE) { - return SE->getKind() == MetadataKind; + return SE->getKind() == SymbolMetadataKind; } }; @@ -315,7 +321,7 @@ class SymbolCast : public SymExpr { public: SymbolCast(const SymExpr *In, QualType From, QualType To) : - SymExpr(CastSymbolKind), Operand(In), FromTy(From), ToTy(To) { } + SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) { } QualType getType() const override { return ToTy; } @@ -325,7 +331,7 @@ public: static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *In, QualType From, QualType To) { - ID.AddInteger((unsigned) CastSymbolKind); + ID.AddInteger((unsigned) SymbolCastKind); ID.AddPointer(In); ID.Add(From); ID.Add(To); @@ -337,7 +343,7 @@ public: // Implement isa<T> support. static inline bool classof(const SymExpr *SE) { - return SE->getKind() == CastSymbolKind; + return SE->getKind() == SymbolCastKind; } }; @@ -372,7 +378,7 @@ class SymIntExpr : public BinarySymExpr { public: SymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const llvm::APSInt& rhs, QualType t) - : BinarySymExpr(SymIntKind, op, t), LHS(lhs), RHS(rhs) {} + : BinarySymExpr(SymIntExprKind, op, t), LHS(lhs), RHS(rhs) {} void dumpToStream(raw_ostream &os) const override; @@ -382,7 +388,7 @@ public: static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs, BinaryOperator::Opcode op, const llvm::APSInt& rhs, QualType t) { - ID.AddInteger((unsigned) SymIntKind); + ID.AddInteger((unsigned) SymIntExprKind); ID.AddPointer(lhs); ID.AddInteger(op); ID.AddPointer(&rhs); @@ -395,7 +401,7 @@ public: // Implement isa<T> support. static inline bool classof(const SymExpr *SE) { - return SE->getKind() == SymIntKind; + return SE->getKind() == SymIntExprKind; } }; @@ -407,7 +413,7 @@ class IntSymExpr : public BinarySymExpr { public: IntSymExpr(const llvm::APSInt& lhs, BinaryOperator::Opcode op, const SymExpr *rhs, QualType t) - : BinarySymExpr(IntSymKind, op, t), LHS(lhs), RHS(rhs) {} + : BinarySymExpr(IntSymExprKind, op, t), LHS(lhs), RHS(rhs) {} void dumpToStream(raw_ostream &os) const override; @@ -417,7 +423,7 @@ public: static void Profile(llvm::FoldingSetNodeID& ID, const llvm::APSInt& lhs, BinaryOperator::Opcode op, const SymExpr *rhs, QualType t) { - ID.AddInteger((unsigned) IntSymKind); + ID.AddInteger((unsigned) IntSymExprKind); ID.AddPointer(&lhs); ID.AddInteger(op); ID.AddPointer(rhs); @@ -430,7 +436,7 @@ public: // Implement isa<T> support. static inline bool classof(const SymExpr *SE) { - return SE->getKind() == IntSymKind; + return SE->getKind() == IntSymExprKind; } }; @@ -442,7 +448,7 @@ class SymSymExpr : public BinarySymExpr { public: SymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs, QualType t) - : BinarySymExpr(SymSymKind, op, t), LHS(lhs), RHS(rhs) {} + : BinarySymExpr(SymSymExprKind, op, t), LHS(lhs), RHS(rhs) {} const SymExpr *getLHS() const { return LHS; } const SymExpr *getRHS() const { return RHS; } @@ -451,7 +457,7 @@ public: static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs, QualType t) { - ID.AddInteger((unsigned) SymSymKind); + ID.AddInteger((unsigned) SymSymExprKind); ID.AddPointer(lhs); ID.AddInteger(op); ID.AddPointer(rhs); @@ -464,7 +470,7 @@ public: // Implement isa<T> support. static inline bool classof(const SymExpr *SE) { - return SE->getKind() == SymSymKind; + return SE->getKind() == SymSymExprKind; } }; diff --git a/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp b/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp index d4abbe4..6438696 100644 --- a/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp @@ -1836,6 +1836,13 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { Align = static_cast<unsigned>(Width); } } + break; + + case Type::Pipe: { + TypeInfo Info = getTypeInfo(cast<PipeType>(T)->getElementType()); + Width = Info.Width; + Align = Info.Align; + } } @@ -2663,6 +2670,7 @@ QualType ASTContext::getVariableArrayDecayedType(QualType type) const { case Type::FunctionProto: case Type::BlockPointer: case Type::MemberPointer: + case Type::Pipe: return type; // These types can be variably-modified. All these modifications @@ -3117,6 +3125,32 @@ ASTContext::getFunctionType(QualType ResultTy, ArrayRef<QualType> ArgArray, return QualType(FTP, 0); } +/// Return pipe type for the specified type. +QualType ASTContext::getPipeType(QualType T) const { + llvm::FoldingSetNodeID ID; + PipeType::Profile(ID, T); + + void *InsertPos = 0; + if (PipeType *PT = PipeTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(PT, 0); + + // If the pipe element type isn't canonical, this won't be a canonical type + // either, so fill in the canonical type field. + QualType Canonical; + if (!T.isCanonical()) { + Canonical = getPipeType(getCanonicalType(T)); + + // Get the new insert position for the node we care about. + PipeType *NewIP = PipeTypes.FindNodeOrInsertPos(ID, InsertPos); + assert(!NewIP && "Shouldn't be in the map!"); + (void)NewIP; + } + PipeType *New = new (*this, TypeAlignment) PipeType(T, Canonical); + Types.push_back(New); + PipeTypes.InsertNode(New, InsertPos); + return QualType(New, 0); +} + #ifndef NDEBUG static bool NeedsInjectedClassNameType(const RecordDecl *D) { if (!isa<CXXRecordDecl>(D)) return false; @@ -5857,6 +5891,7 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string& S, case Type::Auto: return; + case Type::Pipe: #define ABSTRACT_TYPE(KIND, BASE) #define TYPE(KIND, BASE) #define DEPENDENT_TYPE(KIND, BASE) \ @@ -7792,6 +7827,24 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS, return QualType(); } + case Type::Pipe: + { + // Merge two pointer types, while trying to preserve typedef info + QualType LHSValue = LHS->getAs<PipeType>()->getElementType(); + QualType RHSValue = RHS->getAs<PipeType>()->getElementType(); + if (Unqualified) { + LHSValue = LHSValue.getUnqualifiedType(); + RHSValue = RHSValue.getUnqualifiedType(); + } + QualType ResultType = mergeTypes(LHSValue, RHSValue, false, + Unqualified); + if (ResultType.isNull()) return QualType(); + if (getCanonicalType(LHSValue) == getCanonicalType(ResultType)) + return LHS; + if (getCanonicalType(RHSValue) == getCanonicalType(ResultType)) + return RHS; + return getPipeType(ResultType); + } } llvm_unreachable("Invalid Type::Class!"); diff --git a/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp b/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp index e7fee03..4622a75 100644 --- a/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp @@ -1055,6 +1055,7 @@ void ASTDumper::VisitTypedefDecl(const TypedefDecl *D) { dumpType(D->getUnderlyingType()); if (D->isModulePrivate()) OS << " __module_private__"; + dumpTypeAsChild(D->getUnderlyingType()); } void ASTDumper::VisitEnumDecl(const EnumDecl *D) { @@ -1226,6 +1227,7 @@ void ASTDumper::VisitNamespaceAliasDecl(const NamespaceAliasDecl *D) { void ASTDumper::VisitTypeAliasDecl(const TypeAliasDecl *D) { dumpName(D); dumpType(D->getUnderlyingType()); + dumpTypeAsChild(D->getUnderlyingType()); } void ASTDumper::VisitTypeAliasTemplateDecl(const TypeAliasTemplateDecl *D) { @@ -1419,6 +1421,8 @@ void ASTDumper::VisitUnresolvedUsingValueDecl(const UnresolvedUsingValueDecl *D) void ASTDumper::VisitUsingShadowDecl(const UsingShadowDecl *D) { OS << ' '; dumpBareDeclRef(D->getTargetDecl()); + if (auto *TD = dyn_cast<TypeDecl>(D->getUnderlyingDecl())) + dumpTypeAsChild(TD->getTypeForDecl()); } void ASTDumper::VisitLinkageSpecDecl(const LinkageSpecDecl *D) { diff --git a/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp b/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp index 359db1b..916f108 100644 --- a/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp @@ -878,6 +878,14 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, break; } + case Type::Pipe: { + if (!IsStructurallyEquivalent(Context, + cast<PipeType>(T1)->getElementType(), + cast<PipeType>(T2)->getElementType())) + return false; + break; + } + } // end switch return true; diff --git a/contrib/llvm/tools/clang/lib/AST/Decl.cpp b/contrib/llvm/tools/clang/lib/AST/Decl.cpp index 42bebc5..427ca5e 100644 --- a/contrib/llvm/tools/clang/lib/AST/Decl.cpp +++ b/contrib/llvm/tools/clang/lib/AST/Decl.cpp @@ -1184,7 +1184,7 @@ static LinkageInfo getLVForLocalDecl(const NamedDecl *D, return LinkageInfo::none(); const Decl *OuterD = getOutermostFuncOrBlockContext(D); - if (!OuterD) + if (!OuterD || OuterD->isInvalidDecl()) return LinkageInfo::none(); LinkageInfo LV; @@ -4024,16 +4024,26 @@ EnumConstantDecl::CreateDeserialized(ASTContext &C, unsigned ID) { void IndirectFieldDecl::anchor() { } +IndirectFieldDecl::IndirectFieldDecl(ASTContext &C, DeclContext *DC, + SourceLocation L, DeclarationName N, + QualType T, NamedDecl **CH, unsigned CHS) + : ValueDecl(IndirectField, DC, L, N, T), Chaining(CH), ChainingSize(CHS) { + // In C++, indirect field declarations conflict with tag declarations in the + // same scope, so add them to IDNS_Tag so that tag redeclaration finds them. + if (C.getLangOpts().CPlusPlus) + IdentifierNamespace |= IDNS_Tag; +} + IndirectFieldDecl * IndirectFieldDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L, IdentifierInfo *Id, QualType T, NamedDecl **CH, unsigned CHS) { - return new (C, DC) IndirectFieldDecl(DC, L, Id, T, CH, CHS); + return new (C, DC) IndirectFieldDecl(C, DC, L, Id, T, CH, CHS); } IndirectFieldDecl *IndirectFieldDecl::CreateDeserialized(ASTContext &C, unsigned ID) { - return new (C, ID) IndirectFieldDecl(nullptr, SourceLocation(), + return new (C, ID) IndirectFieldDecl(C, nullptr, SourceLocation(), DeclarationName(), QualType(), nullptr, 0); } diff --git a/contrib/llvm/tools/clang/lib/AST/DeclBase.cpp b/contrib/llvm/tools/clang/lib/AST/DeclBase.cpp index 16394e8..72587e3 100644 --- a/contrib/llvm/tools/clang/lib/AST/DeclBase.cpp +++ b/contrib/llvm/tools/clang/lib/AST/DeclBase.cpp @@ -569,7 +569,6 @@ unsigned Decl::getIdentifierNamespaceForKind(Kind DeclKind) { case Var: case ImplicitParam: case ParmVar: - case NonTypeTemplateParm: case ObjCMethod: case ObjCProperty: case MSProperty: @@ -579,6 +578,12 @@ unsigned Decl::getIdentifierNamespaceForKind(Kind DeclKind) { case IndirectField: return IDNS_Ordinary | IDNS_Member; + case NonTypeTemplateParm: + // Non-type template parameters are not found by lookups that ignore + // non-types, but they are found by redeclaration lookups for tag types, + // so we include them in the tag namespace. + return IDNS_Ordinary | IDNS_Tag; + case ObjCCompatibleAlias: case ObjCInterface: return IDNS_Ordinary | IDNS_Type; diff --git a/contrib/llvm/tools/clang/lib/AST/Expr.cpp b/contrib/llvm/tools/clang/lib/AST/Expr.cpp index f9757b2..52f34df 100644 --- a/contrib/llvm/tools/clang/lib/AST/Expr.cpp +++ b/contrib/llvm/tools/clang/lib/AST/Expr.cpp @@ -1553,6 +1553,7 @@ bool CastExpr::CastConsistency() const { case CK_ToVoid: case CK_VectorSplat: case CK_IntegralCast: + case CK_BooleanToSignedIntegral: case CK_IntegralToFloating: case CK_FloatingToIntegral: case CK_FloatingCast: @@ -1646,6 +1647,8 @@ const char *CastExpr::getCastKindName() const { return "VectorSplat"; case CK_IntegralCast: return "IntegralCast"; + case CK_BooleanToSignedIntegral: + return "BooleanToSignedIntegral"; case CK_IntegralToBoolean: return "IntegralToBoolean"; case CK_IntegralToFloating: diff --git a/contrib/llvm/tools/clang/lib/AST/ExprCXX.cpp b/contrib/llvm/tools/clang/lib/AST/ExprCXX.cpp index d35efcb..ea98334 100644 --- a/contrib/llvm/tools/clang/lib/AST/ExprCXX.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ExprCXX.cpp @@ -763,14 +763,6 @@ const IdentifierInfo *UserDefinedLiteral::getUDSuffix() const { return cast<FunctionDecl>(getCalleeDecl())->getLiteralIdentifier(); } -CXXDefaultArgExpr * -CXXDefaultArgExpr::Create(const ASTContext &C, SourceLocation Loc, - ParmVarDecl *Param, Expr *SubExpr) { - void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(1)); - return new (Mem) CXXDefaultArgExpr(CXXDefaultArgExprClass, Loc, Param, - SubExpr); -} - CXXDefaultInitExpr::CXXDefaultInitExpr(const ASTContext &C, SourceLocation Loc, FieldDecl *Field, QualType T) : Expr(CXXDefaultInitExprClass, T.getNonLValueExprType(C), diff --git a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp index c4c4398..fa652ba 100644 --- a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp @@ -7781,12 +7781,16 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) { case CK_PointerToBoolean: case CK_IntegralToBoolean: case CK_FloatingToBoolean: + case CK_BooleanToSignedIntegral: case CK_FloatingComplexToBoolean: case CK_IntegralComplexToBoolean: { bool BoolResult; if (!EvaluateAsBooleanCondition(SubExpr, BoolResult, Info)) return false; - return Success(BoolResult, E); + uint64_t IntResult = BoolResult; + if (BoolResult && E->getCastKind() == CK_BooleanToSignedIntegral) + IntResult = (uint64_t)-1; + return Success(IntResult, E); } case CK_IntegralCast: { @@ -8223,6 +8227,7 @@ bool ComplexExprEvaluator::VisitCastExpr(const CastExpr *E) { case CK_ToVoid: case CK_VectorSplat: case CK_IntegralCast: + case CK_BooleanToSignedIntegral: case CK_IntegralToBoolean: case CK_IntegralToFloating: case CK_FloatingToIntegral: diff --git a/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp b/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp index 8018188..3f6b682 100644 --- a/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp @@ -1509,6 +1509,7 @@ bool CXXNameMangler::mangleUnresolvedTypeOrSimpleId(QualType Ty, case Type::ObjCInterface: case Type::ObjCObjectPointer: case Type::Atomic: + case Type::Pipe: llvm_unreachable("type is illegal as a nested name specifier"); case Type::SubstTemplateTypeParmPack: @@ -2682,6 +2683,13 @@ void CXXNameMangler::mangleType(const AtomicType *T) { mangleType(T->getValueType()); } +void CXXNameMangler::mangleType(const PipeType *T) { + // Pipe type mangling rules are described in SPIR 2.0 specification + // A.1 Data types and A.3 Summary of changes + // <type> ::= 8ocl_pipe + Out << "8ocl_pipe"; +} + void CXXNameMangler::mangleIntegerLiteral(QualType T, const llvm::APSInt &Value) { // <expr-primary> ::= L <type> <value number> E # integer literal diff --git a/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp b/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp index d45232b..4a45f9e 100644 --- a/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp +++ b/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp @@ -2428,6 +2428,15 @@ void MicrosoftCXXNameMangler::mangleType(const AtomicType *T, Qualifiers, mangleArtificalTagType(TTK_Struct, TemplateMangling, {"__clang"}); } +void MicrosoftCXXNameMangler::mangleType(const PipeType *T, Qualifiers, + SourceRange Range) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this OpenCL pipe type yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; +} + void MicrosoftMangleContextImpl::mangleCXXName(const NamedDecl *D, raw_ostream &Out) { assert((isa<FunctionDecl>(D) || isa<VarDecl>(D)) && diff --git a/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp b/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp index bc3c2a8..bc5ae0f 100644 --- a/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp +++ b/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp @@ -1552,7 +1552,8 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) { FieldAlign = 1; // But, if there's an 'aligned' attribute on the field, honor that. - if (unsigned ExplicitFieldAlign = D->getMaxAlignment()) { + unsigned ExplicitFieldAlign = D->getMaxAlignment(); + if (ExplicitFieldAlign) { FieldAlign = std::max(FieldAlign, ExplicitFieldAlign); UnpackedFieldAlign = std::max(UnpackedFieldAlign, ExplicitFieldAlign); } @@ -1601,6 +1602,10 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) { (AllowPadding && (FieldOffset & (FieldAlign-1)) + FieldSize > TypeSize)) { FieldOffset = llvm::RoundUpToAlignment(FieldOffset, FieldAlign); + } else if (ExplicitFieldAlign) { + // TODO: figure it out what needs to be done on targets that don't honor + // bit-field type alignment like ARM APCS ABI. + FieldOffset = llvm::RoundUpToAlignment(FieldOffset, ExplicitFieldAlign); } // Repeat the computation for diagnostic purposes. @@ -1609,6 +1614,9 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) { (UnpackedFieldOffset & (UnpackedFieldAlign-1)) + FieldSize > TypeSize)) UnpackedFieldOffset = llvm::RoundUpToAlignment(UnpackedFieldOffset, UnpackedFieldAlign); + else if (ExplicitFieldAlign) + UnpackedFieldOffset = llvm::RoundUpToAlignment(UnpackedFieldOffset, + ExplicitFieldAlign); } // If we're using external layout, give the external layout a chance diff --git a/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp b/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp index e55b2fc..69f52f5 100644 --- a/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp +++ b/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp @@ -1165,6 +1165,7 @@ void StmtPrinter::VisitCharacterLiteral(CharacterLiteral *Node) { switch (Node->getKind()) { case CharacterLiteral::Ascii: break; // no prefix. case CharacterLiteral::Wide: OS << 'L'; break; + case CharacterLiteral::UTF8: OS << "u8"; break; case CharacterLiteral::UTF16: OS << 'u'; break; case CharacterLiteral::UTF32: OS << 'U'; break; } diff --git a/contrib/llvm/tools/clang/lib/AST/Type.cpp b/contrib/llvm/tools/clang/lib/AST/Type.cpp index 7dd38cb..b467dac 100644 --- a/contrib/llvm/tools/clang/lib/AST/Type.cpp +++ b/contrib/llvm/tools/clang/lib/AST/Type.cpp @@ -2614,7 +2614,7 @@ StringRef BuiltinType::getName(const PrintingPolicy &Policy) const { case OCLQueue: return "queue_t"; case OCLNDRange: - return "event_t"; + return "ndrange_t"; case OCLReserveID: return "reserve_id_t"; case OMPArraySection: @@ -3361,6 +3361,8 @@ static CachedProperties computeCachedProperties(const Type *T) { return Cache::get(cast<ObjCObjectPointerType>(T)->getPointeeType()); case Type::Atomic: return Cache::get(cast<AtomicType>(T)->getValueType()); + case Type::Pipe: + return Cache::get(cast<PipeType>(T)->getElementType()); } llvm_unreachable("unhandled type class"); @@ -3443,6 +3445,8 @@ static LinkageInfo computeLinkageInfo(const Type *T) { return computeLinkageInfo(cast<ObjCObjectPointerType>(T)->getPointeeType()); case Type::Atomic: return computeLinkageInfo(cast<AtomicType>(T)->getValueType()); + case Type::Pipe: + return computeLinkageInfo(cast<PipeType>(T)->getElementType()); } llvm_unreachable("unhandled type class"); @@ -3601,6 +3605,7 @@ bool Type::canHaveNullability() const { case Type::ObjCObject: case Type::ObjCInterface: case Type::Atomic: + case Type::Pipe: return false; } llvm_unreachable("bad type kind!"); diff --git a/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp b/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp index 4617e1d..b202523 100644 --- a/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp +++ b/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp @@ -193,6 +193,7 @@ bool TypePrinter::canPrefixQualifiers(const Type *T, case Type::ObjCObject: case Type::ObjCInterface: case Type::Atomic: + case Type::Pipe: CanPrefixQualifiers = true; break; @@ -859,6 +860,15 @@ void TypePrinter::printAtomicBefore(const AtomicType *T, raw_ostream &OS) { } void TypePrinter::printAtomicAfter(const AtomicType *T, raw_ostream &OS) { } +void TypePrinter::printPipeBefore(const PipeType *T, raw_ostream &OS) { + IncludeStrongLifetimeRAII Strong(Policy); + + OS << "pipe"; + spaceBeforePlaceHolder(OS); +} + +void TypePrinter::printPipeAfter(const PipeType *T, raw_ostream &OS) { +} /// Appends the given scope to the end of a string. void TypePrinter::AppendScope(DeclContext *DC, raw_ostream &OS) { if (DC->isTranslationUnit()) return; diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets.cpp b/contrib/llvm/tools/clang/lib/Basic/Targets.cpp index 9ce5257..1bc6c51 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Targets.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/Targets.cpp @@ -223,7 +223,24 @@ protected: public: DarwinTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) { - this->TLSSupported = Triple.isMacOSX() && !Triple.isMacOSXVersionLT(10, 7); + // By default, no TLS, and we whitelist permitted architecture/OS + // combinations. + this->TLSSupported = false; + + if (Triple.isMacOSX()) + this->TLSSupported = !Triple.isMacOSXVersionLT(10, 7); + else if (Triple.isiOS()) { + // 64-bit iOS supported it from 8 onwards, 32-bit from 9 onwards. + if (Triple.getArch() == llvm::Triple::x86_64 || + Triple.getArch() == llvm::Triple::aarch64) + this->TLSSupported = !Triple.isOSVersionLT(8); + else if (Triple.getArch() == llvm::Triple::x86 || + Triple.getArch() == llvm::Triple::arm || + Triple.getArch() == llvm::Triple::thumb) + this->TLSSupported = !Triple.isOSVersionLT(9); + } else if (Triple.isWatchOS()) + this->TLSSupported = !Triple.isOSVersionLT(2); + this->MCountName = "\01mcount"; } @@ -7281,7 +7298,7 @@ public: explicit WebAssembly32TargetInfo(const llvm::Triple &T) : WebAssemblyTargetInfo(T) { MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32; - DataLayoutString = "e-p:32:32-i64:64-n32:64-S128"; + DataLayoutString = "e-m:e-p:32:32-i64:64-n32:64-S128"; } protected: @@ -7299,7 +7316,7 @@ public: LongAlign = LongWidth = 64; PointerAlign = PointerWidth = 64; MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; - DataLayoutString = "e-p:64:64-i64:64-n32:64-S128"; + DataLayoutString = "e-m:e-p:64:64-i64:64-n32:64-S128"; } protected: diff --git a/contrib/llvm/tools/clang/lib/Basic/VirtualFileSystem.cpp b/contrib/llvm/tools/clang/lib/Basic/VirtualFileSystem.cpp index cf5a8d6..6977f40 100644 --- a/contrib/llvm/tools/clang/lib/Basic/VirtualFileSystem.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/VirtualFileSystem.cpp @@ -658,6 +658,23 @@ directory_iterator InMemoryFileSystem::dir_begin(const Twine &Dir, EC = make_error_code(llvm::errc::not_a_directory); return directory_iterator(std::make_shared<InMemoryDirIterator>()); } + +std::error_code InMemoryFileSystem::setCurrentWorkingDirectory(const Twine &P) { + SmallString<128> Path; + P.toVector(Path); + + // Fix up relative paths. This just prepends the current working directory. + std::error_code EC = makeAbsolute(Path); + assert(!EC); + (void)EC; + + if (useNormalizedPaths()) + llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true); + + if (!Path.empty()) + WorkingDirectory = Path.str(); + return std::error_code(); +} } } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp index 7032d00..6d746c2 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/FunctionIndexObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/TargetRegistry.h" @@ -54,7 +55,6 @@ class EmitAssemblyHelper { const clang::TargetOptions &TargetOpts; const LangOptions &LangOpts; Module *TheModule; - std::unique_ptr<FunctionInfoIndex> FunctionIndex; Timer CodeGenerationTime; @@ -97,7 +97,7 @@ private: return PerFunctionPasses; } - void CreatePasses(); + void CreatePasses(FunctionInfoIndex *FunctionIndex); /// Generates the TargetMachine. /// Returns Null if it is unable to create the target machine. @@ -117,12 +117,11 @@ private: public: EmitAssemblyHelper(DiagnosticsEngine &_Diags, const CodeGenOptions &CGOpts, const clang::TargetOptions &TOpts, - const LangOptions &LOpts, Module *M, - std::unique_ptr<FunctionInfoIndex> Index) + const LangOptions &LOpts, Module *M) : Diags(_Diags), CodeGenOpts(CGOpts), TargetOpts(TOpts), LangOpts(LOpts), - TheModule(M), FunctionIndex(std::move(Index)), - CodeGenerationTime("Code Generation Time"), CodeGenPasses(nullptr), - PerModulePasses(nullptr), PerFunctionPasses(nullptr) {} + TheModule(M), CodeGenerationTime("Code Generation Time"), + CodeGenPasses(nullptr), PerModulePasses(nullptr), + PerFunctionPasses(nullptr) {} ~EmitAssemblyHelper() { delete CodeGenPasses; @@ -278,7 +277,7 @@ static void addSymbolRewriterPass(const CodeGenOptions &Opts, MPM->add(createRewriteSymbolsPass(DL)); } -void EmitAssemblyHelper::CreatePasses() { +void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) { if (CodeGenOpts.DisableLLVMPasses) return; @@ -332,9 +331,8 @@ void EmitAssemblyHelper::CreatePasses() { // If we are performing a ThinLTO importing compile, invoke the LTO // pipeline and pass down the in-memory function index. - if (!CodeGenOpts.ThinLTOIndexFile.empty()) { - assert(FunctionIndex && "Expected non-empty function index"); - PMBuilder.FunctionIndex = FunctionIndex.get(); + if (FunctionIndex) { + PMBuilder.FunctionIndex = FunctionIndex; PMBuilder.populateLTOPassManager(*MPM); return; } @@ -642,7 +640,28 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, return; if (TM) TheModule->setDataLayout(TM->createDataLayout()); - CreatePasses(); + + // If we are performing a ThinLTO importing compile, load the function + // index into memory and pass it into CreatePasses, which will add it + // to the PassManagerBuilder and invoke LTO passes. + std::unique_ptr<FunctionInfoIndex> FunctionIndex; + if (!CodeGenOpts.ThinLTOIndexFile.empty()) { + ErrorOr<std::unique_ptr<FunctionInfoIndex>> IndexOrErr = + llvm::getFunctionIndexForFile(CodeGenOpts.ThinLTOIndexFile, + [&](const DiagnosticInfo &DI) { + TheModule->getContext().diagnose(DI); + }); + if (std::error_code EC = IndexOrErr.getError()) { + std::string Error = EC.message(); + errs() << "Error loading index file '" << CodeGenOpts.ThinLTOIndexFile + << "': " << Error << "\n"; + return; + } + FunctionIndex = std::move(IndexOrErr.get()); + assert(FunctionIndex && "Expected non-empty function index"); + } + + CreatePasses(FunctionIndex.get()); switch (Action) { case Backend_EmitNothing: @@ -695,10 +714,8 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, const clang::TargetOptions &TOpts, const LangOptions &LOpts, StringRef TDesc, Module *M, BackendAction Action, - raw_pwrite_stream *OS, - std::unique_ptr<FunctionInfoIndex> Index) { - EmitAssemblyHelper AsmHelper(Diags, CGOpts, TOpts, LOpts, M, - std::move(Index)); + raw_pwrite_stream *OS) { + EmitAssemblyHelper AsmHelper(Diags, CGOpts, TOpts, LOpts, M); AsmHelper.EmitAssembly(Action, OS); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp index 78e3978..5df8519 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp @@ -2025,6 +2025,11 @@ llvm::DIType *CGDebugInfo::CreateType(const AtomicType *Ty, llvm::DIFile *U) { return getOrCreateType(Ty->getValueType(), U); } +llvm::DIType* CGDebugInfo::CreateType(const PipeType *Ty, + llvm::DIFile *U) { + return getOrCreateType(Ty->getElementType(), U); +} + llvm::DIType *CGDebugInfo::CreateEnumType(const EnumType *Ty) { const EnumDecl *ED = Ty->getDecl(); @@ -2284,6 +2289,9 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { case Type::Atomic: return CreateType(cast<AtomicType>(Ty), Unit); + case Type::Pipe: + return CreateType(cast<PipeType>(Ty), Unit); + case Type::TemplateSpecialization: return CreateType(cast<TemplateSpecializationType>(Ty), Unit); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h index 57d5c80..a68dd33f 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h @@ -168,6 +168,7 @@ class CGDebugInfo { llvm::DIType *CreateType(const RValueReferenceType *Ty, llvm::DIFile *Unit); llvm::DIType *CreateType(const MemberPointerType *Ty, llvm::DIFile *F); llvm::DIType *CreateType(const AtomicType *Ty, llvm::DIFile *F); + llvm::DIType *CreateType(const PipeType *Ty, llvm::DIFile *F); /// Get enumeration type. llvm::DIType *CreateEnumType(const EnumType *Ty); llvm::DIType *CreateTypeDefinition(const EnumType *Ty); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp index dabd2b1..507ce3d 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp @@ -3365,6 +3365,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { case CK_PointerToBoolean: case CK_VectorSplat: case CK_IntegralCast: + case CK_BooleanToSignedIntegral: case CK_IntegralToBoolean: case CK_IntegralToFloating: case CK_FloatingToIntegral: diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp index 20838db..a4547a9 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp @@ -721,6 +721,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { case CK_ToVoid: case CK_VectorSplat: case CK_IntegralCast: + case CK_BooleanToSignedIntegral: case CK_IntegralToBoolean: case CK_IntegralToFloating: case CK_FloatingToIntegral: diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprComplex.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprComplex.cpp index ccdb532..22910d9 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprComplex.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprComplex.cpp @@ -462,6 +462,7 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op, case CK_ToVoid: case CK_VectorSplat: case CK_IntegralCast: + case CK_BooleanToSignedIntegral: case CK_IntegralToBoolean: case CK_IntegralToFloating: case CK_FloatingToIntegral: diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp index 3839ab7..ee049f1 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp @@ -735,6 +735,7 @@ public: case CK_PointerToBoolean: case CK_NullToPointer: case CK_IntegralCast: + case CK_BooleanToSignedIntegral: case CK_IntegralToPointer: case CK_IntegralToBoolean: case CK_IntegralToFloating: diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp index 725d96f..268e796 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp @@ -811,14 +811,15 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, // A scalar can be splatted to an extended vector of the same element type if (DstType->isExtVectorType() && !SrcType->isVectorType()) { - // Cast the scalar to element type - QualType EltTy = DstType->getAs<ExtVectorType>()->getElementType(); - llvm::Value *Elt = EmitScalarConversion( - Src, SrcType, EltTy, Loc, CGF.getContext().getLangOpts().OpenCL); + // Sema should add casts to make sure that the source expression's type is + // the same as the vector's element type (sans qualifiers) + assert(DstType->castAs<ExtVectorType>()->getElementType().getTypePtr() == + SrcType.getTypePtr() && + "Splatted expr doesn't match with vector element type?"); // Splat the element across to all elements unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements(); - return Builder.CreateVectorSplat(NumElements, Elt, "splat"); + return Builder.CreateVectorSplat(NumElements, Src, "splat"); } // Allow bitcast from vector to integer/fp of the same size. @@ -1541,15 +1542,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { } case CK_VectorSplat: { llvm::Type *DstTy = ConvertType(DestTy); - // Need an IgnoreImpCasts here as by default a boolean will be promoted to - // an int, which will not perform the sign extension, so if we know we are - // going to cast to a vector we have to strip the implicit cast off. - Value *Elt = Visit(const_cast<Expr*>(E->IgnoreImpCasts())); - Elt = EmitScalarConversion(Elt, E->IgnoreImpCasts()->getType(), - DestTy->getAs<VectorType>()->getElementType(), - CE->getExprLoc(), - CGF.getContext().getLangOpts().OpenCL); - + Value *Elt = Visit(const_cast<Expr*>(E)); // Splat the element across to all elements unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements(); return Builder.CreateVectorSplat(NumElements, Elt, "splat"); @@ -1561,6 +1554,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { case CK_FloatingCast: return EmitScalarConversion(Visit(E), E->getType(), DestTy, CE->getExprLoc()); + case CK_BooleanToSignedIntegral: + return EmitScalarConversion(Visit(E), E->getType(), DestTy, + CE->getExprLoc(), + /*TreatBooleanAsSigned=*/true); case CK_IntegralToBoolean: return EmitIntToBoolConversion(Visit(E)); case CK_PointerToBoolean: diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp index 8af39ce..6866789 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp @@ -99,3 +99,14 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) { llvm::StructType::create(Ctx, "opencl.reserve_id_t"), 0); } } + +llvm::Type *CGOpenCLRuntime::getPipeType() { + if (!PipeTy){ + uint32_t PipeAddrSpc = + CGM.getContext().getTargetAddressSpace(LangAS::opencl_global); + PipeTy = llvm::PointerType::get(llvm::StructType::create( + CGM.getLLVMContext(), "opencl.pipe_t"), PipeAddrSpc); + } + + return PipeTy; +} diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.h index 0c50b92..f1a7a31 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.h @@ -32,9 +32,10 @@ class CodeGenModule; class CGOpenCLRuntime { protected: CodeGenModule &CGM; + llvm::Type *PipeTy; public: - CGOpenCLRuntime(CodeGenModule &CGM) : CGM(CGM) {} + CGOpenCLRuntime(CodeGenModule &CGM) : CGM(CGM), PipeTy(nullptr) {} virtual ~CGOpenCLRuntime(); /// Emit the IR required for a work-group-local variable declaration, and add @@ -44,6 +45,8 @@ public: const VarDecl &D); virtual llvm::Type *convertOpenCLSpecificType(const Type *T); + + virtual llvm::Type *getPipeType(); }; } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 6d4fc9f..3b97ba2 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -84,7 +84,7 @@ public: protected: CGOpenMPRegionKind RegionKind; - const RegionCodeGenTy &CodeGen; + RegionCodeGenTy CodeGen; OpenMPDirectiveKind Kind; bool HasCancel; }; diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h index 6b04fbe..b325637 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -335,7 +335,7 @@ private: public: /// \brief Kind of a given entry. Currently, only target regions are /// supported. - enum OffloadingEntryInfoKinds { + enum OffloadingEntryInfoKinds : unsigned { // Entry is a target region. OFFLOAD_ENTRY_INFO_TARGET_REGION = 0, // Invalid entry info. @@ -955,7 +955,7 @@ public: /// \brief Emit the target regions enclosed in \a GD function definition or /// the function itself in case it is a valid device function. Returns true if /// \a GD was dealt with successfully. - /// \param FD Function to scan. + /// \param GD Function to scan. virtual bool emitTargetFunctions(GlobalDecl GD); /// \brief Emit the global variable if it is a valid device global variable. diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp index abef543..0a670ab 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp @@ -26,12 +26,10 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" -#include "llvm/IR/FunctionInfo.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" -#include "llvm/Object/FunctionIndexObjectFile.h" #include "llvm/Pass.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" @@ -781,43 +779,11 @@ void CodeGenAction::ExecuteAction() { TheModule->setTargetTriple(TargetOpts.Triple); } - auto DiagHandler = [&](const DiagnosticInfo &DI) { - TheModule->getContext().diagnose(DI); - }; - - // If we are performing ThinLTO importing compilation (indicated by - // a non-empty index file option), then we need promote to global scope - // and rename any local values that are potentially exported to other - // modules. Do this early so that the rest of the compilation sees the - // promoted symbols. - std::unique_ptr<FunctionInfoIndex> Index; - if (!CI.getCodeGenOpts().ThinLTOIndexFile.empty()) { - ErrorOr<std::unique_ptr<FunctionInfoIndex>> IndexOrErr = - llvm::getFunctionIndexForFile(CI.getCodeGenOpts().ThinLTOIndexFile, - DiagHandler); - if (std::error_code EC = IndexOrErr.getError()) { - std::string Error = EC.message(); - errs() << "Error loading index file '" - << CI.getCodeGenOpts().ThinLTOIndexFile << "': " << Error - << "\n"; - return; - } - Index = std::move(IndexOrErr.get()); - assert(Index); - // Currently this requires creating a new Module object. - std::unique_ptr<llvm::Module> RenamedModule = - renameModuleForThinLTO(std::move(TheModule), Index.get()); - if (!RenamedModule) - return; - - TheModule = std::move(RenamedModule); - } - LLVMContext &Ctx = TheModule->getContext(); Ctx.setInlineAsmDiagnosticHandler(BitcodeInlineAsmDiagHandler); EmitBackendOutput(CI.getDiagnostics(), CI.getCodeGenOpts(), TargetOpts, CI.getLangOpts(), CI.getTarget().getDataLayoutString(), - TheModule.get(), BA, OS, std::move(Index)); + TheModule.get(), BA, OS); return; } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp index 048a043..e38ff0a 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp @@ -79,7 +79,7 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext) if (CGM.getCodeGenOpts().ReciprocalMath) { FMF.setAllowReciprocal(); } - Builder.SetFastMathFlags(FMF); + Builder.setFastMathFlags(FMF); } CodeGenFunction::~CodeGenFunction() { @@ -195,6 +195,7 @@ TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) { case Type::FunctionNoProto: case Type::Enum: case Type::ObjCObjectPointer: + case Type::Pipe: return TEK_Scalar; // Complexes. @@ -511,7 +512,8 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, typeQuals += typeQuals.empty() ? "volatile" : " volatile"; } else { uint32_t AddrSpc = 0; - if (ty->isImageType()) + bool isPipe = ty->isPipeType(); + if (ty->isImageType() || isPipe) AddrSpc = CGM.getContext().getTargetAddressSpace(LangAS::opencl_global); @@ -519,7 +521,11 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, llvm::ConstantAsMetadata::get(Builder.getInt32(AddrSpc))); // Get argument type name. - std::string typeName = ty.getUnqualifiedType().getAsString(Policy); + std::string typeName; + if (isPipe) + typeName = cast<PipeType>(ty)->getElementType().getAsString(Policy); + else + typeName = ty.getUnqualifiedType().getAsString(Policy); // Turn "unsigned type" to "utype" std::string::size_type pos = typeName.find("unsigned"); @@ -528,7 +534,12 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, argTypeNames.push_back(llvm::MDString::get(Context, typeName)); - std::string baseTypeName = + std::string baseTypeName; + if (isPipe) + baseTypeName = + cast<PipeType>(ty)->getElementType().getCanonicalType().getAsString(Policy); + else + baseTypeName = ty.getUnqualifiedType().getCanonicalType().getAsString(Policy); // Turn "unsigned type" to "utype" @@ -543,12 +554,16 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, typeQuals = "const"; if (ty.isVolatileQualified()) typeQuals += typeQuals.empty() ? "volatile" : " volatile"; + if (isPipe) + typeQuals = "pipe"; } argTypeQuals.push_back(llvm::MDString::get(Context, typeQuals)); - // Get image access qualifier: - if (ty->isImageType()) { + // Get image and pipe access qualifier: + // FIXME: now image and pipe share the same access qualifier maybe we can + // refine it to OpenCL access qualifier and also handle write_read + if (ty->isImageType()|| ty->isPipeType()) { const OpenCLImageAccessAttr *A = parm->getAttr<OpenCLImageAccessAttr>(); if (A && A->isWriteOnly()) accessQuals.push_back(llvm::MDString::get(Context, "write_only")); @@ -1727,6 +1742,10 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) { case Type::Atomic: type = cast<AtomicType>(ty)->getValueType(); break; + + case Type::Pipe: + type = cast<PipeType>(ty)->getElementType(); + break; } } while (type->isVariablyModifiedType()); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp index 536c55a..97b1662 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp @@ -615,7 +615,20 @@ void CodeGenModule::setTLSMode(llvm::GlobalValue *GV, const VarDecl &D) const { } StringRef CodeGenModule::getMangledName(GlobalDecl GD) { - StringRef &FoundStr = MangledDeclNames[GD.getCanonicalDecl()]; + GlobalDecl CanonicalGD = GD.getCanonicalDecl(); + + // Some ABIs don't have constructor variants. Make sure that base and + // complete constructors get mangled the same. + if (const auto *CD = dyn_cast<CXXConstructorDecl>(CanonicalGD.getDecl())) { + if (!getTarget().getCXXABI().hasConstructorVariants()) { + CXXCtorType OrigCtorType = GD.getCtorType(); + assert(OrigCtorType == Ctor_Base || OrigCtorType == Ctor_Complete); + if (OrigCtorType == Ctor_Base) + CanonicalGD = GlobalDecl(CD, Ctor_Complete); + } + } + + StringRef &FoundStr = MangledDeclNames[CanonicalGD]; if (!FoundStr.empty()) return FoundStr; diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp index 5ae861e..2c0d93b 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp @@ -699,7 +699,7 @@ CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name, setFuncName(Name, Linkage); CGM.getCoverageMapping()->addFunctionMappingRecord( - FuncNameVar, FuncName, FunctionHash, CoverageMapping); + FuncNameVar, FuncName, FunctionHash, CoverageMapping, false); } void CodeGenPGO::computeRegionCounts(const Decl *D) { diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp index fcda053..09d9bf1 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp @@ -628,6 +628,10 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { } break; } + case Type::Pipe: { + ResultType = CGM.getOpenCLRuntime().getPipeType(); + break; + } } assert(ResultType && "Didn't convert a type?"); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp index 1d4d709..03e22cd 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -910,11 +910,11 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName, } void CoverageMappingModuleGen::addFunctionMappingRecord( - llvm::GlobalVariable *NamePtr, StringRef NameValue, - uint64_t FuncHash, const std::string &CoverageMapping) { + llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash, + const std::string &CoverageMapping, bool isUsed) { llvm::LLVMContext &Ctx = CGM.getLLVMContext(); if (!FunctionRecordTy) { - #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) LLVMType, +#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) LLVMType, llvm::Type *FunctionRecordTypes[] = { #include "llvm/ProfileData/InstrProfData.inc" }; @@ -929,6 +929,9 @@ void CoverageMappingModuleGen::addFunctionMappingRecord( }; FunctionRecords.push_back(llvm::ConstantStruct::get( FunctionRecordTy, makeArrayRef(FunctionRecordVals))); + if (!isUsed) + FunctionNames.push_back( + llvm::ConstantExpr::getBitCast(NamePtr, llvm::Type::getInt8PtrTy(Ctx))); CoverageMappings += CoverageMapping; if (CGM.getCodeGenOpts().DumpCoverageMapping) { @@ -1023,6 +1026,17 @@ void CoverageMappingModuleGen::emit() { // Make sure the data doesn't get deleted. CGM.addUsedGlobal(CovData); + // Create the deferred function records array + if (!FunctionNames.empty()) { + auto NamesArrTy = llvm::ArrayType::get(llvm::Type::getInt8PtrTy(Ctx), + FunctionNames.size()); + auto NamesArrVal = llvm::ConstantArray::get(NamesArrTy, FunctionNames); + // This variable will *NOT* be emitted to the object file. It is used + // to pass the list of names referenced to codegen. + new llvm::GlobalVariable(CGM.getModule(), NamesArrTy, true, + llvm::GlobalValue::InternalLinkage, NamesArrVal, + llvm::getCoverageNamesVarName()); + } } unsigned CoverageMappingModuleGen::getFileID(const FileEntry *File) { diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h index 0d1bf6d..9ae2bcf 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h @@ -54,6 +54,7 @@ class CoverageMappingModuleGen { CoverageSourceInfo &SourceInfo; llvm::SmallDenseMap<const FileEntry *, unsigned, 8> FileEntries; std::vector<llvm::Constant *> FunctionRecords; + std::vector<llvm::Constant *> FunctionNames; llvm::StructType *FunctionRecordTy; std::string CoverageMappings; @@ -70,7 +71,8 @@ public: void addFunctionMappingRecord(llvm::GlobalVariable *FunctionName, StringRef FunctionNameValue, uint64_t FunctionHash, - const std::string &CoverageMapping); + const std::string &CoverageMapping, + bool isUsed = true); /// \brief Emit the coverage mapping data for a translation unit. void emit(); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp index 0c4008f..e02c8dc 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -2715,6 +2715,9 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { case Type::Auto: llvm_unreachable("Undeduced auto type shouldn't get here"); + case Type::Pipe: + llvm_unreachable("Pipe types shouldn't get here"); + case Type::Builtin: // GCC treats vector and complex types as fundamental types. case Type::Vector: @@ -2939,6 +2942,9 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force) { case Type::Auto: llvm_unreachable("Undeduced auto type shouldn't get here"); + case Type::Pipe: + llvm_unreachable("Pipe type shouldn't get here"); + case Type::ConstantArray: case Type::IncompleteArray: case Type::VariableArray: diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index b397eb3..f385e53 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -59,8 +59,10 @@ class PCHContainerGenerator : public ASTConsumer { struct DebugTypeVisitor : public RecursiveASTVisitor<DebugTypeVisitor> { clang::CodeGen::CGDebugInfo &DI; ASTContext &Ctx; - DebugTypeVisitor(clang::CodeGen::CGDebugInfo &DI, ASTContext &Ctx) - : DI(DI), Ctx(Ctx) {} + bool SkipTagDecls; + DebugTypeVisitor(clang::CodeGen::CGDebugInfo &DI, ASTContext &Ctx, + bool SkipTagDecls) + : DI(DI), Ctx(Ctx), SkipTagDecls(SkipTagDecls) {} /// Determine whether this type can be represented in DWARF. static bool CanRepresent(const Type *Ty) { @@ -75,6 +77,12 @@ class PCHContainerGenerator : public ASTConsumer { } bool VisitTypeDecl(TypeDecl *D) { + // TagDecls may be deferred until after all decls have been merged and we + // know the complete type. Pure forward declarations will be skipped, but + // they don't need to be emitted into the module anyway. + if (SkipTagDecls && isa<TagDecl>(D)) + return true; + QualType QualTy = Ctx.getTypeDeclType(D); if (!QualTy.isNull() && CanRepresent(QualTy.getTypePtr())) DI.getOrCreateStandaloneType(QualTy, D->getLocation()); @@ -165,7 +173,7 @@ public: // Collect debug info for all decls in this group. for (auto *I : D) if (!I->isFromASTFile()) { - DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx); + DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx, true); DTV.TraverseDecl(I); } return true; @@ -179,6 +187,11 @@ public: if (Diags.hasErrorOccurred()) return; + if (D->isFromASTFile()) + return; + + DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx, false); + DTV.TraverseDecl(D); Builder->UpdateCompletedType(D); } diff --git a/contrib/llvm/tools/clang/lib/Driver/Action.cpp b/contrib/llvm/tools/clang/lib/Driver/Action.cpp index 49dccd2..e9490e9 100644 --- a/contrib/llvm/tools/clang/lib/Driver/Action.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/Action.cpp @@ -8,17 +8,14 @@ //===----------------------------------------------------------------------===// #include "clang/Driver/Action.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Regex.h" #include <cassert> using namespace clang::driver; using namespace llvm::opt; -Action::~Action() { - if (OwnsInputs) { - for (iterator it = begin(), ie = end(); it != ie; ++it) - delete *it; - } -} +Action::~Action() {} const char *Action::getClassName(ActionClass AC) { switch (AC) { @@ -51,33 +48,53 @@ InputAction::InputAction(const Arg &_Input, types::ID _Type) void BindArchAction::anchor() {} -BindArchAction::BindArchAction(std::unique_ptr<Action> Input, - const char *_ArchName) - : Action(BindArchClass, std::move(Input)), ArchName(_ArchName) {} +BindArchAction::BindArchAction(Action *Input, const char *_ArchName) + : Action(BindArchClass, Input), ArchName(_ArchName) {} + +// Converts CUDA GPU architecture, e.g. "sm_21", to its corresponding virtual +// compute arch, e.g. "compute_20". Returns null if the input arch is null or +// doesn't match an existing arch. +static const char* GpuArchToComputeName(const char *ArchName) { + if (!ArchName) + return nullptr; + return llvm::StringSwitch<const char *>(ArchName) + .Cases("sm_20", "sm_21", "compute_20") + .Case("sm_30", "compute_30") + .Case("sm_32", "compute_32") + .Case("sm_35", "compute_35") + .Case("sm_37", "compute_37") + .Case("sm_50", "compute_50") + .Case("sm_52", "compute_52") + .Case("sm_53", "compute_53") + .Default(nullptr); +} void CudaDeviceAction::anchor() {} -CudaDeviceAction::CudaDeviceAction(std::unique_ptr<Action> Input, - const char *ArchName, bool AtTopLevel) - : Action(CudaDeviceClass, std::move(Input)), GpuArchName(ArchName), - AtTopLevel(AtTopLevel) {} - -void CudaHostAction::anchor() {} +CudaDeviceAction::CudaDeviceAction(Action *Input, const char *ArchName, + bool AtTopLevel) + : Action(CudaDeviceClass, Input), GpuArchName(ArchName), + AtTopLevel(AtTopLevel) { + assert(IsValidGpuArchName(GpuArchName)); +} -CudaHostAction::CudaHostAction(std::unique_ptr<Action> Input, - const ActionList &DeviceActions) - : Action(CudaHostClass, std::move(Input)), DeviceActions(DeviceActions) {} +const char *CudaDeviceAction::getComputeArchName() const { + return GpuArchToComputeName(GpuArchName); +} -CudaHostAction::~CudaHostAction() { - for (auto &DA : DeviceActions) - delete DA; +bool CudaDeviceAction::IsValidGpuArchName(llvm::StringRef ArchName) { + return GpuArchToComputeName(ArchName.data()) != nullptr; } +void CudaHostAction::anchor() {} + +CudaHostAction::CudaHostAction(Action *Input, const ActionList &DeviceActions) + : Action(CudaHostClass, Input), DeviceActions(DeviceActions) {} + void JobAction::anchor() {} -JobAction::JobAction(ActionClass Kind, std::unique_ptr<Action> Input, - types::ID Type) - : Action(Kind, std::move(Input), Type) {} +JobAction::JobAction(ActionClass Kind, Action *Input, types::ID Type) + : Action(Kind, Input, Type) {} JobAction::JobAction(ActionClass Kind, const ActionList &Inputs, types::ID Type) : Action(Kind, Inputs, Type) { @@ -85,45 +102,38 @@ JobAction::JobAction(ActionClass Kind, const ActionList &Inputs, types::ID Type) void PreprocessJobAction::anchor() {} -PreprocessJobAction::PreprocessJobAction(std::unique_ptr<Action> Input, - types::ID OutputType) - : JobAction(PreprocessJobClass, std::move(Input), OutputType) {} +PreprocessJobAction::PreprocessJobAction(Action *Input, types::ID OutputType) + : JobAction(PreprocessJobClass, Input, OutputType) {} void PrecompileJobAction::anchor() {} -PrecompileJobAction::PrecompileJobAction(std::unique_ptr<Action> Input, - types::ID OutputType) - : JobAction(PrecompileJobClass, std::move(Input), OutputType) {} +PrecompileJobAction::PrecompileJobAction(Action *Input, types::ID OutputType) + : JobAction(PrecompileJobClass, Input, OutputType) {} void AnalyzeJobAction::anchor() {} -AnalyzeJobAction::AnalyzeJobAction(std::unique_ptr<Action> Input, - types::ID OutputType) - : JobAction(AnalyzeJobClass, std::move(Input), OutputType) {} +AnalyzeJobAction::AnalyzeJobAction(Action *Input, types::ID OutputType) + : JobAction(AnalyzeJobClass, Input, OutputType) {} void MigrateJobAction::anchor() {} -MigrateJobAction::MigrateJobAction(std::unique_ptr<Action> Input, - types::ID OutputType) - : JobAction(MigrateJobClass, std::move(Input), OutputType) {} +MigrateJobAction::MigrateJobAction(Action *Input, types::ID OutputType) + : JobAction(MigrateJobClass, Input, OutputType) {} void CompileJobAction::anchor() {} -CompileJobAction::CompileJobAction(std::unique_ptr<Action> Input, - types::ID OutputType) - : JobAction(CompileJobClass, std::move(Input), OutputType) {} +CompileJobAction::CompileJobAction(Action *Input, types::ID OutputType) + : JobAction(CompileJobClass, Input, OutputType) {} void BackendJobAction::anchor() {} -BackendJobAction::BackendJobAction(std::unique_ptr<Action> Input, - types::ID OutputType) - : JobAction(BackendJobClass, std::move(Input), OutputType) {} +BackendJobAction::BackendJobAction(Action *Input, types::ID OutputType) + : JobAction(BackendJobClass, Input, OutputType) {} void AssembleJobAction::anchor() {} -AssembleJobAction::AssembleJobAction(std::unique_ptr<Action> Input, - types::ID OutputType) - : JobAction(AssembleJobClass, std::move(Input), OutputType) {} +AssembleJobAction::AssembleJobAction(Action *Input, types::ID OutputType) + : JobAction(AssembleJobClass, Input, OutputType) {} void LinkJobAction::anchor() {} @@ -145,21 +155,20 @@ DsymutilJobAction::DsymutilJobAction(ActionList &Inputs, types::ID Type) void VerifyJobAction::anchor() {} -VerifyJobAction::VerifyJobAction(ActionClass Kind, - std::unique_ptr<Action> Input, types::ID Type) - : JobAction(Kind, std::move(Input), Type) { +VerifyJobAction::VerifyJobAction(ActionClass Kind, Action *Input, + types::ID Type) + : JobAction(Kind, Input, Type) { assert((Kind == VerifyDebugInfoJobClass || Kind == VerifyPCHJobClass) && "ActionClass is not a valid VerifyJobAction"); } void VerifyDebugInfoJobAction::anchor() {} -VerifyDebugInfoJobAction::VerifyDebugInfoJobAction( - std::unique_ptr<Action> Input, types::ID Type) - : VerifyJobAction(VerifyDebugInfoJobClass, std::move(Input), Type) {} +VerifyDebugInfoJobAction::VerifyDebugInfoJobAction(Action *Input, + types::ID Type) + : VerifyJobAction(VerifyDebugInfoJobClass, Input, Type) {} void VerifyPCHJobAction::anchor() {} -VerifyPCHJobAction::VerifyPCHJobAction(std::unique_ptr<Action> Input, - types::ID Type) - : VerifyJobAction(VerifyPCHJobClass, std::move(Input), Type) {} +VerifyPCHJobAction::VerifyPCHJobAction(Action *Input, types::ID Type) + : VerifyJobAction(VerifyPCHJobClass, Input, Type) {} diff --git a/contrib/llvm/tools/clang/lib/Driver/Compilation.cpp b/contrib/llvm/tools/clang/lib/Driver/Compilation.cpp index e4af2a6..1c2eecd 100644 --- a/contrib/llvm/tools/clang/lib/Driver/Compilation.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/Compilation.cpp @@ -40,11 +40,6 @@ Compilation::~Compilation() { if (it->second != TranslatedArgs) delete it->second; - // Free the actions, if built. - for (ActionList::iterator it = Actions.begin(), ie = Actions.end(); - it != ie; ++it) - delete *it; - // Free redirections of stdout/stderr. if (Redirects) { delete Redirects[1]; @@ -208,7 +203,8 @@ void Compilation::initCompilationForDiagnostics() { ForDiagnostics = true; // Free actions and jobs. - DeleteContainerPointers(Actions); + Actions.clear(); + AllActions.clear(); Jobs.clear(); // Clear temporary/results file lists. diff --git a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp index 85bbcb4..1e0a48d 100644 --- a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp @@ -1049,19 +1049,15 @@ void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC, << types::getTypeName(Act->getType()); ActionList Inputs; - for (unsigned i = 0, e = Archs.size(); i != e; ++i) { - Inputs.push_back( - new BindArchAction(std::unique_ptr<Action>(Act), Archs[i])); - if (i != 0) - Inputs.back()->setOwnsInputs(false); - } + for (unsigned i = 0, e = Archs.size(); i != e; ++i) + Inputs.push_back(C.MakeAction<BindArchAction>(Act, Archs[i])); // Lipo if necessary, we do it this way because we need to set the arch flag // so that -Xarch_ gets overwritten. if (Inputs.size() == 1 || Act->getType() == types::TY_Nothing) Actions.append(Inputs.begin(), Inputs.end()); else - Actions.push_back(new LipoJobAction(Inputs, Act->getType())); + Actions.push_back(C.MakeAction<LipoJobAction>(Inputs, Act->getType())); // Handle debug info queries. Arg *A = Args.getLastArg(options::OPT_g_Group); @@ -1077,15 +1073,16 @@ void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC, ActionList Inputs; Inputs.push_back(Actions.back()); Actions.pop_back(); - Actions.push_back(new DsymutilJobAction(Inputs, types::TY_dSYM)); + Actions.push_back( + C.MakeAction<DsymutilJobAction>(Inputs, types::TY_dSYM)); } // Verify the debug info output. if (Args.hasArg(options::OPT_verify_debug_info)) { - std::unique_ptr<Action> VerifyInput(Actions.back()); + Action* LastAction = Actions.back(); Actions.pop_back(); - Actions.push_back(new VerifyDebugInfoJobAction(std::move(VerifyInput), - types::TY_Nothing)); + Actions.push_back(C.MakeAction<VerifyDebugInfoJobAction>( + LastAction, types::TY_Nothing)); } } } @@ -1283,26 +1280,29 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args, // Actions and /p Current is released. Otherwise the function creates // and returns a new CudaHostAction which wraps /p Current and device // side actions. -static std::unique_ptr<Action> -buildCudaActions(Compilation &C, DerivedArgList &Args, const Arg *InputArg, - std::unique_ptr<Action> HostAction, ActionList &Actions) { +static Action *buildCudaActions(Compilation &C, DerivedArgList &Args, + const Arg *InputArg, Action *HostAction, + ActionList &Actions) { Arg *PartialCompilationArg = Args.getLastArg(options::OPT_cuda_host_only, options::OPT_cuda_device_only); // Host-only compilation case. if (PartialCompilationArg && PartialCompilationArg->getOption().matches(options::OPT_cuda_host_only)) - return std::unique_ptr<Action>( - new CudaHostAction(std::move(HostAction), {})); + return C.MakeAction<CudaHostAction>(HostAction, ActionList()); // Collect all cuda_gpu_arch parameters, removing duplicates. SmallVector<const char *, 4> GpuArchList; llvm::StringSet<> GpuArchNames; for (Arg *A : Args) { - if (A->getOption().matches(options::OPT_cuda_gpu_arch_EQ)) { - A->claim(); - if (GpuArchNames.insert(A->getValue()).second) - GpuArchList.push_back(A->getValue()); - } + if (!A->getOption().matches(options::OPT_cuda_gpu_arch_EQ)) + continue; + A->claim(); + + const auto& Arch = A->getValue(); + if (!CudaDeviceAction::IsValidGpuArchName(Arch)) + C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << Arch; + else if (GpuArchNames.insert(Arch).second) + GpuArchList.push_back(Arch); } // Default to sm_20 which is the lowest common denominator for supported GPUs. @@ -1325,13 +1325,10 @@ buildCudaActions(Compilation &C, DerivedArgList &Args, const Arg *InputArg, "Failed to create actions for all devices"); // Check whether any of device actions stopped before they could generate PTX. - bool PartialCompilation = false; - for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { - if (CudaDeviceActions[I]->getKind() != Action::BackendJobClass) { - PartialCompilation = true; - break; - } - } + bool PartialCompilation = + llvm::any_of(CudaDeviceActions, [](const Action *a) { + return a->getKind() != Action::BackendJobClass; + }); // Figure out what to do with device actions -- pass them as inputs to the // host action or run each of them independently. @@ -1350,12 +1347,12 @@ buildCudaActions(Compilation &C, DerivedArgList &Args, const Arg *InputArg, } for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) - Actions.push_back(new CudaDeviceAction( - std::unique_ptr<Action>(CudaDeviceActions[I]), GpuArchList[I], - /* AtTopLevel */ true)); + Actions.push_back(C.MakeAction<CudaDeviceAction>(CudaDeviceActions[I], + GpuArchList[I], + /* AtTopLevel */ true)); // Kill host action in case of device-only compilation. if (DeviceOnlyCompilation) - HostAction.reset(nullptr); + return nullptr; return HostAction; } @@ -1363,13 +1360,12 @@ buildCudaActions(Compilation &C, DerivedArgList &Args, const Arg *InputArg, // with AtTopLevel=false and become inputs for the host action. ActionList DeviceActions; for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) - DeviceActions.push_back(new CudaDeviceAction( - std::unique_ptr<Action>(CudaDeviceActions[I]), GpuArchList[I], - /* AtTopLevel */ false)); + DeviceActions.push_back( + C.MakeAction<CudaDeviceAction>(CudaDeviceActions[I], GpuArchList[I], + /* AtTopLevel */ false)); // Return a new host action that incorporates original host action and all // device actions. - return std::unique_ptr<Action>( - new CudaHostAction(std::move(HostAction), DeviceActions)); + return C.MakeAction<CudaHostAction>(HostAction, DeviceActions); } void Driver::BuildActions(Compilation &C, const ToolChain &TC, @@ -1470,15 +1466,14 @@ void Driver::BuildActions(Compilation &C, const ToolChain &TC, continue; } - phases::ID CudaInjectionPhase = FinalPhase; - for (const auto &Phase : PL) - if (Phase <= FinalPhase && Phase == phases::Compile) { - CudaInjectionPhase = Phase; - break; - } + phases::ID CudaInjectionPhase = + (phases::Compile < FinalPhase && + llvm::find(PL, phases::Compile) != PL.end()) + ? phases::Compile + : FinalPhase; // Build the pipeline for this file. - std::unique_ptr<Action> Current(new InputAction(*InputArg, InputType)); + Action *Current = C.MakeAction<InputAction>(*InputArg, InputType); for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e = PL.end(); i != e; ++i) { phases::ID Phase = *i; @@ -1490,7 +1485,8 @@ void Driver::BuildActions(Compilation &C, const ToolChain &TC, // Queue linker inputs. if (Phase == phases::Link) { assert((i + 1) == e && "linking must be final compilation step."); - LinkerInputs.push_back(Current.release()); + LinkerInputs.push_back(Current); + Current = nullptr; break; } @@ -1501,11 +1497,10 @@ void Driver::BuildActions(Compilation &C, const ToolChain &TC, continue; // Otherwise construct the appropriate action. - Current = ConstructPhaseAction(TC, Args, Phase, std::move(Current)); + Current = ConstructPhaseAction(C, TC, Args, Phase, Current); if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase) { - Current = - buildCudaActions(C, Args, InputArg, std::move(Current), Actions); + Current = buildCudaActions(C, Args, InputArg, Current, Actions); if (!Current) break; } @@ -1516,12 +1511,13 @@ void Driver::BuildActions(Compilation &C, const ToolChain &TC, // If we ended with something, add to the output list. if (Current) - Actions.push_back(Current.release()); + Actions.push_back(Current); } // Add a link action if necessary. if (!LinkerInputs.empty()) - Actions.push_back(new LinkJobAction(LinkerInputs, types::TY_Image)); + Actions.push_back( + C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image)); // If we are linking, claim any options which are obviously only used for // compilation. @@ -1538,10 +1534,9 @@ void Driver::BuildActions(Compilation &C, const ToolChain &TC, Args.ClaimAllArgs(options::OPT_cuda_host_only); } -std::unique_ptr<Action> -Driver::ConstructPhaseAction(const ToolChain &TC, const ArgList &Args, - phases::ID Phase, - std::unique_ptr<Action> Input) const { +Action *Driver::ConstructPhaseAction(Compilation &C, const ToolChain &TC, + const ArgList &Args, phases::ID Phase, + Action *Input) const { llvm::PrettyStackTraceString CrashInfo("Constructing phase actions"); // Build the appropriate action. switch (Phase) { @@ -1561,7 +1556,7 @@ Driver::ConstructPhaseAction(const ToolChain &TC, const ArgList &Args, assert(OutputTy != types::TY_INVALID && "Cannot preprocess this input type!"); } - return llvm::make_unique<PreprocessJobAction>(std::move(Input), OutputTy); + return C.MakeAction<PreprocessJobAction>(Input, OutputTy); } case phases::Precompile: { types::ID OutputTy = types::TY_PCH; @@ -1569,53 +1564,43 @@ Driver::ConstructPhaseAction(const ToolChain &TC, const ArgList &Args, // Syntax checks should not emit a PCH file OutputTy = types::TY_Nothing; } - return llvm::make_unique<PrecompileJobAction>(std::move(Input), OutputTy); + return C.MakeAction<PrecompileJobAction>(Input, OutputTy); } case phases::Compile: { if (Args.hasArg(options::OPT_fsyntax_only)) - return llvm::make_unique<CompileJobAction>(std::move(Input), - types::TY_Nothing); + return C.MakeAction<CompileJobAction>(Input, types::TY_Nothing); if (Args.hasArg(options::OPT_rewrite_objc)) - return llvm::make_unique<CompileJobAction>(std::move(Input), - types::TY_RewrittenObjC); + return C.MakeAction<CompileJobAction>(Input, types::TY_RewrittenObjC); if (Args.hasArg(options::OPT_rewrite_legacy_objc)) - return llvm::make_unique<CompileJobAction>(std::move(Input), - types::TY_RewrittenLegacyObjC); + return C.MakeAction<CompileJobAction>(Input, + types::TY_RewrittenLegacyObjC); if (Args.hasArg(options::OPT__analyze, options::OPT__analyze_auto)) - return llvm::make_unique<AnalyzeJobAction>(std::move(Input), - types::TY_Plist); + return C.MakeAction<AnalyzeJobAction>(Input, types::TY_Plist); if (Args.hasArg(options::OPT__migrate)) - return llvm::make_unique<MigrateJobAction>(std::move(Input), - types::TY_Remap); + return C.MakeAction<MigrateJobAction>(Input, types::TY_Remap); if (Args.hasArg(options::OPT_emit_ast)) - return llvm::make_unique<CompileJobAction>(std::move(Input), - types::TY_AST); + return C.MakeAction<CompileJobAction>(Input, types::TY_AST); if (Args.hasArg(options::OPT_module_file_info)) - return llvm::make_unique<CompileJobAction>(std::move(Input), - types::TY_ModuleFile); + return C.MakeAction<CompileJobAction>(Input, types::TY_ModuleFile); if (Args.hasArg(options::OPT_verify_pch)) - return llvm::make_unique<VerifyPCHJobAction>(std::move(Input), - types::TY_Nothing); - return llvm::make_unique<CompileJobAction>(std::move(Input), - types::TY_LLVM_BC); + return C.MakeAction<VerifyPCHJobAction>(Input, types::TY_Nothing); + return C.MakeAction<CompileJobAction>(Input, types::TY_LLVM_BC); } case phases::Backend: { if (isUsingLTO()) { types::ID Output = Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC; - return llvm::make_unique<BackendJobAction>(std::move(Input), Output); + return C.MakeAction<BackendJobAction>(Input, Output); } if (Args.hasArg(options::OPT_emit_llvm)) { types::ID Output = Args.hasArg(options::OPT_S) ? types::TY_LLVM_IR : types::TY_LLVM_BC; - return llvm::make_unique<BackendJobAction>(std::move(Input), Output); + return C.MakeAction<BackendJobAction>(Input, Output); } - return llvm::make_unique<BackendJobAction>(std::move(Input), - types::TY_PP_Asm); + return C.MakeAction<BackendJobAction>(Input, types::TY_PP_Asm); } case phases::Assemble: - return llvm::make_unique<AssembleJobAction>(std::move(Input), - types::TY_Object); + return C.MakeAction<AssembleJobAction>(Input, types::TY_Object); } llvm_unreachable("invalid phase in ConstructPhaseAction"); @@ -1662,12 +1647,11 @@ void Driver::BuildJobs(Compilation &C) const { LinkingOutput = getDefaultImageName(); } - InputInfo II; BuildJobsForAction(C, A, &C.getDefaultToolChain(), /*BoundArch*/ nullptr, /*AtTopLevel*/ true, /*MultipleArchs*/ ArchNames.size() > 1, - /*LinkingOutput*/ LinkingOutput, II); + /*LinkingOutput*/ LinkingOutput); } // If the user passed -Qunused-arguments or there were errors, don't warn @@ -1795,21 +1779,19 @@ static const Tool *selectToolForJob(Compilation &C, bool SaveTemps, return ToolForJob; } -void Driver::BuildJobsForAction(Compilation &C, const Action *A, - const ToolChain *TC, const char *BoundArch, - bool AtTopLevel, bool MultipleArchs, - const char *LinkingOutput, - InputInfo &Result) const { +InputInfo Driver::BuildJobsForAction(Compilation &C, const Action *A, + const ToolChain *TC, const char *BoundArch, + bool AtTopLevel, bool MultipleArchs, + const char *LinkingOutput) const { llvm::PrettyStackTraceString CrashInfo("Building compilation jobs"); InputInfoList CudaDeviceInputInfos; if (const CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) { - InputInfo II; // Append outputs of device jobs to the input list. for (const Action *DA : CHA->getDeviceActions()) { - BuildJobsForAction(C, DA, TC, nullptr, AtTopLevel, - /*MultipleArchs*/ false, LinkingOutput, II); - CudaDeviceInputInfos.push_back(II); + CudaDeviceInputInfos.push_back( + BuildJobsForAction(C, DA, TC, nullptr, AtTopLevel, + /*MultipleArchs*/ false, LinkingOutput)); } // Override current action with a real host compile action and continue // processing it. @@ -1823,11 +1805,9 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A, Input.claim(); if (Input.getOption().matches(options::OPT_INPUT)) { const char *Name = Input.getValue(); - Result = InputInfo(Name, A->getType(), Name); - } else { - Result = InputInfo(&Input, A->getType(), ""); + return InputInfo(A, Name, /* BaseInput = */ Name); } - return; + return InputInfo(A, &Input, /* BaseInput = */ ""); } if (const BindArchAction *BAA = dyn_cast<BindArchAction>(A)) { @@ -1841,19 +1821,17 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A, else TC = &C.getDefaultToolChain(); - BuildJobsForAction(C, *BAA->begin(), TC, ArchName, AtTopLevel, - MultipleArchs, LinkingOutput, Result); - return; + return BuildJobsForAction(C, *BAA->begin(), TC, ArchName, AtTopLevel, + MultipleArchs, LinkingOutput); } if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) { // Initial processing of CudaDeviceAction carries host params. // Call BuildJobsForAction() again, now with correct device parameters. assert(CDA->getGpuArchName() && "No GPU name in device action."); - BuildJobsForAction(C, *CDA->begin(), C.getCudaDeviceToolChain(), - CDA->getGpuArchName(), CDA->isAtTopLevel(), - /*MultipleArchs*/ true, LinkingOutput, Result); - return; + return BuildJobsForAction(C, *CDA->begin(), C.getCudaDeviceToolChain(), + CDA->getGpuArchName(), CDA->isAtTopLevel(), + /*MultipleArchs*/ true, LinkingOutput); } const ActionList *Inputs = &A->getInputs(); @@ -1863,16 +1841,15 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A, const Tool *T = selectToolForJob(C, isSaveTempsEnabled(), TC, JA, Inputs, CollapsedCHA); if (!T) - return; + return InputInfo(); // If we've collapsed action list that contained CudaHostAction we // need to build jobs for device-side inputs it may have held. if (CollapsedCHA) { - InputInfo II; for (const Action *DA : CollapsedCHA->getDeviceActions()) { - BuildJobsForAction(C, DA, TC, "", AtTopLevel, - /*MultipleArchs*/ false, LinkingOutput, II); - CudaDeviceInputInfos.push_back(II); + CudaDeviceInputInfos.push_back( + BuildJobsForAction(C, DA, TC, "", AtTopLevel, + /*MultipleArchs*/ false, LinkingOutput)); } } @@ -1882,14 +1859,11 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A, // Treat dsymutil and verify sub-jobs as being at the top-level too, they // shouldn't get temporary output names. // FIXME: Clean this up. - bool SubJobAtTopLevel = false; - if (AtTopLevel && (isa<DsymutilJobAction>(A) || isa<VerifyJobAction>(A))) - SubJobAtTopLevel = true; - - InputInfo II; - BuildJobsForAction(C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs, - LinkingOutput, II); - InputInfos.push_back(II); + bool SubJobAtTopLevel = + AtTopLevel && (isa<DsymutilJobAction>(A) || isa<VerifyJobAction>(A)); + InputInfos.push_back(BuildJobsForAction(C, Input, TC, BoundArch, + SubJobAtTopLevel, MultipleArchs, + LinkingOutput)); } // Always use the first input as the base input. @@ -1905,12 +1879,13 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A, InputInfos.append(CudaDeviceInputInfos.begin(), CudaDeviceInputInfos.end()); // Determine the place to write output to, if any. + InputInfo Result; if (JA->getType() == types::TY_Nothing) - Result = InputInfo(A->getType(), BaseInput); + Result = InputInfo(A, BaseInput); else - Result = InputInfo(GetNamedOutputPath(C, *JA, BaseInput, BoundArch, - AtTopLevel, MultipleArchs), - A->getType(), BaseInput); + Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch, + AtTopLevel, MultipleArchs), + BaseInput); if (CCCPrintBindings && !CCGenDiagnostics) { llvm::errs() << "# \"" << T->getToolChain().getTripleString() << '"' @@ -1925,6 +1900,7 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A, T->ConstructJob(C, *JA, Result, InputInfos, C.getArgsForToolChain(TC, BoundArch), LinkingOutput); } + return Result; } const char *Driver::getDefaultImageName() const { diff --git a/contrib/llvm/tools/clang/lib/Driver/InputInfo.h b/contrib/llvm/tools/clang/lib/Driver/InputInfo.h index b23ba57..0c36e81 100644 --- a/contrib/llvm/tools/clang/lib/Driver/InputInfo.h +++ b/contrib/llvm/tools/clang/lib/Driver/InputInfo.h @@ -10,6 +10,7 @@ #ifndef LLVM_CLANG_LIB_DRIVER_INPUTINFO_H #define LLVM_CLANG_LIB_DRIVER_INPUTINFO_H +#include "clang/Driver/Action.h" #include "clang/Driver/Types.h" #include "llvm/Option/Arg.h" #include <cassert> @@ -38,21 +39,36 @@ class InputInfo { const llvm::opt::Arg *InputArg; } Data; Class Kind; + const Action* Act; types::ID Type; const char *BaseInput; + static types::ID GetActionType(const Action *A) { + return A != nullptr ? A->getType() : types::TY_Nothing; + } + public: - InputInfo() {} - InputInfo(types::ID _Type, const char *_BaseInput) - : Kind(Nothing), Type(_Type), BaseInput(_BaseInput) { + InputInfo() : InputInfo(nullptr, nullptr) {} + InputInfo(const Action *A, const char *_BaseInput) + : Kind(Nothing), Act(A), Type(GetActionType(A)), BaseInput(_BaseInput) {} + + InputInfo(types::ID _Type, const char *_Filename, const char *_BaseInput) + : Kind(Filename), Act(nullptr), Type(_Type), BaseInput(_BaseInput) { + Data.Filename = _Filename; } - InputInfo(const char *_Filename, types::ID _Type, const char *_BaseInput) - : Kind(Filename), Type(_Type), BaseInput(_BaseInput) { + InputInfo(const Action *A, const char *_Filename, const char *_BaseInput) + : Kind(Filename), Act(A), Type(GetActionType(A)), BaseInput(_BaseInput) { Data.Filename = _Filename; } - InputInfo(const llvm::opt::Arg *_InputArg, types::ID _Type, + + InputInfo(types::ID _Type, const llvm::opt::Arg *_InputArg, + const char *_BaseInput) + : Kind(InputArg), Act(nullptr), Type(_Type), BaseInput(_BaseInput) { + Data.InputArg = _InputArg; + } + InputInfo(const Action *A, const llvm::opt::Arg *_InputArg, const char *_BaseInput) - : Kind(InputArg), Type(_Type), BaseInput(_BaseInput) { + : Kind(InputArg), Act(A), Type(GetActionType(A)), BaseInput(_BaseInput) { Data.InputArg = _InputArg; } @@ -61,6 +77,9 @@ public: bool isInputArg() const { return Kind == InputArg; } types::ID getType() const { return Type; } const char *getBaseInput() const { return BaseInput; } + /// The action for which this InputInfo was created. May be null. + const Action *getAction() const { return Act; } + void setAction(const Action *A) { Act = A; } const char *getFilename() const { assert(isFilename() && "Invalid accessor."); diff --git a/contrib/llvm/tools/clang/lib/Driver/MSVCToolChain.cpp b/contrib/llvm/tools/clang/lib/Driver/MSVCToolChain.cpp index b7e576e..6874715 100644 --- a/contrib/llvm/tools/clang/lib/Driver/MSVCToolChain.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/MSVCToolChain.cpp @@ -634,6 +634,96 @@ SanitizerMask MSVCToolChain::getSupportedSanitizers() const { return Res; } +static void TranslateOptArg(Arg *A, llvm::opt::DerivedArgList &DAL, + bool SupportsForcingFramePointer, + const char *ExpandChar, const OptTable &Opts) { + assert(A->getOption().matches(options::OPT__SLASH_O)); + + StringRef OptStr = A->getValue(); + for (size_t I = 0, E = OptStr.size(); I != E; ++I) { + const char &OptChar = *(OptStr.data() + I); + switch (OptChar) { + default: + break; + case '1': + case '2': + case 'x': + case 'd': + if (&OptChar == ExpandChar) { + if (OptChar == 'd') { + DAL.AddFlagArg(A, Opts.getOption(options::OPT_O0)); + } else { + if (OptChar == '1') { + DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "s"); + } else if (OptChar == '2' || OptChar == 'x') { + DAL.AddFlagArg(A, Opts.getOption(options::OPT_fbuiltin)); + DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "2"); + } + if (SupportsForcingFramePointer) + DAL.AddFlagArg(A, + Opts.getOption(options::OPT_fomit_frame_pointer)); + if (OptChar == '1' || OptChar == '2') + DAL.AddFlagArg(A, + Opts.getOption(options::OPT_ffunction_sections)); + } + } + break; + case 'b': + if (I + 1 != E && isdigit(OptStr[I + 1])) + ++I; + break; + case 'g': + break; + case 'i': + if (I + 1 != E && OptStr[I + 1] == '-') { + ++I; + DAL.AddFlagArg(A, Opts.getOption(options::OPT_fno_builtin)); + } else { + DAL.AddFlagArg(A, Opts.getOption(options::OPT_fbuiltin)); + } + break; + case 's': + DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "s"); + break; + case 't': + DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "2"); + break; + case 'y': { + bool OmitFramePointer = true; + if (I + 1 != E && OptStr[I + 1] == '-') { + OmitFramePointer = false; + ++I; + } + if (SupportsForcingFramePointer) { + if (OmitFramePointer) + DAL.AddFlagArg(A, + Opts.getOption(options::OPT_fomit_frame_pointer)); + else + DAL.AddFlagArg( + A, Opts.getOption(options::OPT_fno_omit_frame_pointer)); + } + break; + } + } + } +} + +static void TranslateDArg(Arg *A, llvm::opt::DerivedArgList &DAL, + const OptTable &Opts) { + assert(A->getOption().matches(options::OPT_D)); + + StringRef Val = A->getValue(); + size_t Hash = Val.find('#'); + if (Hash == StringRef::npos || Hash > Val.find('=')) { + DAL.append(A); + return; + } + + std::string NewVal = Val; + NewVal[Hash] = '='; + DAL.AddJoinedArg(A, Opts.getOption(options::OPT_D), NewVal); +} + llvm::opt::DerivedArgList * MSVCToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, const char *BoundArch) const { @@ -664,81 +754,18 @@ MSVCToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, } } - // The -O flag actually takes an amalgam of other options. For example, - // '/Ogyb2' is equivalent to '/Og' '/Oy' '/Ob2'. for (Arg *A : Args) { - if (!A->getOption().matches(options::OPT__SLASH_O)) { + if (A->getOption().matches(options::OPT__SLASH_O)) { + // The -O flag actually takes an amalgam of other options. For example, + // '/Ogyb2' is equivalent to '/Og' '/Oy' '/Ob2'. + TranslateOptArg(A, *DAL, SupportsForcingFramePointer, ExpandChar, Opts); + } else if (A->getOption().matches(options::OPT_D)) { + // Translate -Dfoo#bar into -Dfoo=bar. + TranslateDArg(A, *DAL, Opts); + } else { DAL->append(A); - continue; - } - - StringRef OptStr = A->getValue(); - for (size_t I = 0, E = OptStr.size(); I != E; ++I) { - const char &OptChar = *(OptStr.data() + I); - switch (OptChar) { - default: - break; - case '1': - case '2': - case 'x': - case 'd': - if (&OptChar == ExpandChar) { - if (OptChar == 'd') { - DAL->AddFlagArg(A, Opts.getOption(options::OPT_O0)); - } else { - if (OptChar == '1') { - DAL->AddJoinedArg(A, Opts.getOption(options::OPT_O), "s"); - } else if (OptChar == '2' || OptChar == 'x') { - DAL->AddFlagArg(A, Opts.getOption(options::OPT_fbuiltin)); - DAL->AddJoinedArg(A, Opts.getOption(options::OPT_O), "2"); - } - if (SupportsForcingFramePointer) - DAL->AddFlagArg(A, - Opts.getOption(options::OPT_fomit_frame_pointer)); - if (OptChar == '1' || OptChar == '2') - DAL->AddFlagArg(A, - Opts.getOption(options::OPT_ffunction_sections)); - } - } - break; - case 'b': - if (I + 1 != E && isdigit(OptStr[I + 1])) - ++I; - break; - case 'g': - break; - case 'i': - if (I + 1 != E && OptStr[I + 1] == '-') { - ++I; - DAL->AddFlagArg(A, Opts.getOption(options::OPT_fno_builtin)); - } else { - DAL->AddFlagArg(A, Opts.getOption(options::OPT_fbuiltin)); - } - break; - case 's': - DAL->AddJoinedArg(A, Opts.getOption(options::OPT_O), "s"); - break; - case 't': - DAL->AddJoinedArg(A, Opts.getOption(options::OPT_O), "2"); - break; - case 'y': { - bool OmitFramePointer = true; - if (I + 1 != E && OptStr[I + 1] == '-') { - OmitFramePointer = false; - ++I; - } - if (SupportsForcingFramePointer) { - if (OmitFramePointer) - DAL->AddFlagArg(A, - Opts.getOption(options::OPT_fomit_frame_pointer)); - else - DAL->AddFlagArg( - A, Opts.getOption(options::OPT_fno_omit_frame_pointer)); - } - break; - } - } } } + return DAL; } diff --git a/contrib/llvm/tools/clang/lib/Driver/MinGWToolChain.cpp b/contrib/llvm/tools/clang/lib/Driver/MinGWToolChain.cpp index c5287bb..938440b 100644 --- a/contrib/llvm/tools/clang/lib/Driver/MinGWToolChain.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/MinGWToolChain.cpp @@ -66,17 +66,23 @@ MinGW::MinGW(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) : ToolChain(D, Triple, Args) { getProgramPaths().push_back(getDriver().getInstalledDir()); - // On Windows if there is no sysroot we search for gcc on the PATH. - if (getDriver().SysRoot.size()) - Base = getDriver().SysRoot; +// In Windows there aren't any standard install locations, we search +// for gcc on the PATH. In Linux the base is always /usr. #ifdef LLVM_ON_WIN32 + if (getDriver().SysRoot.size()) + Base = getDriver().SysRoot; else if (llvm::ErrorOr<std::string> GPPName = llvm::sys::findProgramByName("gcc")) Base = llvm::sys::path::parent_path( llvm::sys::path::parent_path(GPPName.get())); -#endif - if (!Base.size()) + else Base = llvm::sys::path::parent_path(getDriver().getInstalledDir()); +#else + if (getDriver().SysRoot.size()) + Base = getDriver().SysRoot; + else + Base = "/usr"; +#endif Base += llvm::sys::path::get_separator(); findGccLibDir(); diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp index 7ece321..beede2e 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp @@ -526,7 +526,7 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const { // no environment variable defined, see if we can set the default based // on -isysroot. if (OSXTarget.empty() && iOSTarget.empty() && WatchOSTarget.empty() && - Args.hasArg(options::OPT_isysroot)) { + TvOSTarget.empty() && Args.hasArg(options::OPT_isysroot)) { if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) { StringRef isysroot = A->getValue(); // Assume SDK has path: SOME_PATH/SDKs/PlatformXX.YY.sdk @@ -2716,13 +2716,8 @@ const StringRef HexagonToolChain::GetDefaultCPU() { const StringRef HexagonToolChain::GetTargetCPUVersion(const ArgList &Args) { Arg *CpuArg = nullptr; - - for (auto &A : Args) { - if (A->getOption().matches(options::OPT_mcpu_EQ)) { - CpuArg = A; - A->claim(); - } - } + if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ, options::OPT_march_EQ)) + CpuArg = A; StringRef CPU = CpuArg ? CpuArg->getValue() : GetDefaultCPU(); if (CPU.startswith("hexagon")) diff --git a/contrib/llvm/tools/clang/lib/Driver/Tools.cpp b/contrib/llvm/tools/clang/lib/Driver/Tools.cpp index 8468105..5a2dbd3 100644 --- a/contrib/llvm/tools/clang/lib/Driver/Tools.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/Tools.cpp @@ -2069,6 +2069,16 @@ void Clang::AddHexagonTargetArgs(const ArgList &Args, CmdArgs.push_back("-machine-sink-split=0"); } +void Clang::AddWebAssemblyTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + // Default to "hidden" visibility. + if (!Args.hasArg(options::OPT_fvisibility_EQ, + options::OPT_fvisibility_ms_compat)) { + CmdArgs.push_back("-fvisibility"); + CmdArgs.push_back("hidden"); + } +} + // Decode AArch64 features from string like +[no]featureA+[no]featureB+... static bool DecodeAArch64Features(const Driver &D, StringRef text, std::vector<const char *> &Features) { @@ -2970,7 +2980,7 @@ static void SplitDebugInfo(const ToolChain &TC, Compilation &C, const Tool &T, ExtractArgs.push_back(OutFile); const char *Exec = Args.MakeArgString(TC.GetProgramPath("objcopy")); - InputInfo II(Output.getFilename(), types::TY_Object, Output.getFilename()); + InputInfo II(types::TY_Object, Output.getFilename(), Output.getFilename()); // First extract the dwo sections. C.addCommand(llvm::make_unique<Command>(JA, T, Exec, ExtractArgs, II)); @@ -3253,8 +3263,9 @@ ParsePICArgs(const ToolChain &ToolChain, const llvm::Triple &Triple, // ToolChain.getTriple() and Triple? bool PIE = ToolChain.isPIEDefault(); bool PIC = PIE || ToolChain.isPICDefault(); - // The Darwin default to use PIC does not apply when using -static. - if (ToolChain.getTriple().isOSDarwin() && Args.hasArg(options::OPT_static)) + // The Darwin/MachO default to use PIC does not apply when using -static. + if (ToolChain.getTriple().isOSBinFormatMachO() && + Args.hasArg(options::OPT_static)) PIE = PIC = false; bool IsPICLevelTwo = PIC; @@ -4015,6 +4026,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, case llvm::Triple::hexagon: AddHexagonTargetArgs(Args, CmdArgs); break; + + case llvm::Triple::wasm32: + case llvm::Triple::wasm64: + AddWebAssemblyTargetArgs(Args, CmdArgs); + break; } // The 'g' groups options involve a somewhat intricate sequence of decisions @@ -4176,8 +4192,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-generate-type-units"); } - // CloudABI uses -ffunction-sections and -fdata-sections by default. - bool UseSeparateSections = Triple.getOS() == llvm::Triple::CloudABI; + // CloudABI and WebAssembly use -ffunction-sections and -fdata-sections by + // default. + bool UseSeparateSections = Triple.getOS() == llvm::Triple::CloudABI || + Triple.getArch() == llvm::Triple::wasm32 || + Triple.getArch() == llvm::Triple::wasm64; if (Args.hasFlag(options::OPT_ffunction_sections, options::OPT_fno_function_sections, UseSeparateSections)) { @@ -6040,8 +6059,7 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, // doesn't handle that so rather than warning about unused flags that are // actually used, we'll lie by omission instead. // FIXME: Stop lying and consume only the appropriate driver flags - for (const Arg *A : Args.filtered(options::OPT_W_Group)) - A->claim(); + Args.ClaimAllArgs(options::OPT_W_Group); CollectArgsForIntegratedAssembler(C, Args, CmdArgs, getToolChain().getDriver()); @@ -6078,6 +6096,12 @@ void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA, for (const auto &A : Args) { if (forwardToGCC(A->getOption())) { + // It is unfortunate that we have to claim here, as this means + // we will basically never report anything interesting for + // platforms using a generic gcc, even if we are just using gcc + // to get to the assembler. + A->claim(); + // Don't forward any -g arguments to assembly steps. if (isa<AssembleJobAction>(JA) && A->getOption().matches(options::OPT_g_Group)) @@ -6088,11 +6112,6 @@ void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA, A->getOption().matches(options::OPT_W_Group)) continue; - // It is unfortunate that we have to claim here, as this means - // we will basically never report anything interesting for - // platforms using a generic gcc, even if we are just using gcc - // to get to the assembler. - A->claim(); A->render(Args, CmdArgs); } } @@ -6502,10 +6521,6 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, std::string Linker = getToolChain().GetProgramPath(getShortName()); ArgStringList CmdArgs; - CmdArgs.push_back("-flavor"); - CmdArgs.push_back("old-gnu"); - CmdArgs.push_back("-target"); - CmdArgs.push_back(Args.MakeArgString(getToolChain().getTripleString())); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); @@ -6534,6 +6549,14 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, ArgStringList CmdArgs; CmdArgs.push_back("-flavor"); CmdArgs.push_back("ld"); + + // Enable garbage collection of unused input sections by default, since code + // size is of particular importance. This is significantly facilitated by + // the enabling of -ffunction-sections and -fdata-sections in + // Clang::ConstructJob. + if (areOptimizationsEnabled(Args)) + CmdArgs.push_back("--gc-sections"); + AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); @@ -8965,7 +8988,7 @@ void nacltools::AssemblerARM::ConstructJob(Compilation &C, const JobAction &JA, const char *LinkingOutput) const { const toolchains::NaClToolChain &ToolChain = static_cast<const toolchains::NaClToolChain &>(getToolChain()); - InputInfo NaClMacros(ToolChain.GetNaClArmMacrosPath(), types::TY_PP_Asm, + InputInfo NaClMacros(types::TY_PP_Asm, ToolChain.GetNaClArmMacrosPath(), "nacl-arm-macros.s"); InputInfoList NewInputs; NewInputs.push_back(NaClMacros); diff --git a/contrib/llvm/tools/clang/lib/Driver/Tools.h b/contrib/llvm/tools/clang/lib/Driver/Tools.h index 168662f..2b137f4 100644 --- a/contrib/llvm/tools/clang/lib/Driver/Tools.h +++ b/contrib/llvm/tools/clang/lib/Driver/Tools.h @@ -82,6 +82,8 @@ private: llvm::opt::ArgStringList &CmdArgs) const; void AddHexagonTargetArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; + void AddWebAssemblyTargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; enum RewriteKind { RK_None, RK_Fragile, RK_NonFragile }; @@ -238,7 +240,7 @@ namespace amdgpu { class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool { public: - Linker(const ToolChain &TC) : GnuTool("amdgpu::Linker", "lld", TC) {} + Linker(const ToolChain &TC) : GnuTool("amdgpu::Linker", "ld.lld", TC) {} bool isLinkJob() const override { return true; } bool hasIntegratedCPP() const override { return false; } void ConstructJob(Compilation &C, const JobAction &JA, diff --git a/contrib/llvm/tools/clang/lib/Edit/RewriteObjCFoundationAPI.cpp b/contrib/llvm/tools/clang/lib/Edit/RewriteObjCFoundationAPI.cpp index 9f71168..482c0f6 100644 --- a/contrib/llvm/tools/clang/lib/Edit/RewriteObjCFoundationAPI.cpp +++ b/contrib/llvm/tools/clang/lib/Edit/RewriteObjCFoundationAPI.cpp @@ -1077,6 +1077,9 @@ static bool rewriteToNumericBoxedExpression(const ObjCMessageExpr *Msg, case CK_BuiltinFnToFnPtr: case CK_ZeroToOCLEvent: return false; + + case CK_BooleanToSignedIntegral: + llvm_unreachable("OpenCL-specific cast in Objective-C?"); } } diff --git a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp index 8faab28..1118335 100644 --- a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp +++ b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp @@ -150,7 +150,12 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection) return true; if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) || - (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName)) || + (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName) && + // FIXME: This is a temporary workaround for the case where clang-format + // sets BreakBeforeParameter to avoid bin packing and this creates a + // completely unnecessary line break after a template type that isn't + // line-wrapped. + (Previous.NestingLevel == 1 || Style.BinPackParameters)) || (Style.BreakBeforeTernaryOperators && Current.is(TT_ConditionalExpr) && Previous.isNot(tok::question)) || (!Style.BreakBeforeTernaryOperators && @@ -177,13 +182,15 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { return true; unsigned NewLineColumn = getNewLineColumn(State); - if (State.Column <= NewLineColumn) - return false; - if (Current.isMemberAccess() && - State.Column + getLengthToNextOperator(Current) > Style.ColumnLimit) + State.Column + getLengthToNextOperator(Current) > Style.ColumnLimit && + (State.Column > NewLineColumn || + Current.NestingLevel < State.StartOfLineLevel)) return true; + if (State.Column <= NewLineColumn) + return false; + if (Style.AlwaysBreakBeforeMultilineStrings && (NewLineColumn == State.FirstIndent + Style.ContinuationIndentWidth || Previous.is(tok::comma) || Current.NestingLevel < 2) && @@ -383,7 +390,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, State.Stack.back().LastSpace = State.Column; State.Stack.back().NestedBlockIndent = State.Column; } else if (!Current.isOneOf(tok::comment, tok::caret) && - (Previous.is(tok::comma) || + ((Previous.is(tok::comma) && + !Previous.is(TT_OverloadedOperator)) || (Previous.is(tok::colon) && Previous.is(TT_ObjCMethodExpr)))) { State.Stack.back().LastSpace = State.Column; } else if ((Previous.isOneOf(TT_BinaryOperator, TT_ConditionalExpr, @@ -860,7 +868,7 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, (!SkipFirstExtraIndent && *I > prec::Assignment && !Current.isTrailingComment())) NewParenState.Indent += Style.ContinuationIndentWidth; - if ((Previous && !Previous->opensScope()) || *I > prec::Comma) + if ((Previous && !Previous->opensScope()) || *I != prec::Comma) NewParenState.BreakBeforeParameter = false; State.Stack.push_back(NewParenState); SkipFirstExtraIndent = false; @@ -906,8 +914,12 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, NewIndent = State.Stack.back().LastSpace + Style.ContinuationIndentWidth; } const FormatToken *NextNoComment = Current.getNextNonComment(); + bool EndsInComma = Current.MatchingParen && + Current.MatchingParen->Previous && + Current.MatchingParen->Previous->is(tok::comma); AvoidBinPacking = - Current.isOneOf(TT_ArrayInitializerLSquare, TT_DictLiteral) || + (Current.is(TT_ArrayInitializerLSquare) && EndsInComma) || + Current.is(TT_DictLiteral) || Style.Language == FormatStyle::LK_Proto || !Style.BinPackArguments || (NextNoComment && NextNoComment->is(TT_DesignatedInitializerPeriod)); if (Current.ParameterCount > 1) diff --git a/contrib/llvm/tools/clang/lib/Format/Format.cpp b/contrib/llvm/tools/clang/lib/Format/Format.cpp index 5068fca..2689368 100644 --- a/contrib/llvm/tools/clang/lib/Format/Format.cpp +++ b/contrib/llvm/tools/clang/lib/Format/Format.cpp @@ -583,6 +583,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; GoogleStyle.BreakBeforeTernaryOperators = false; + GoogleStyle.CommentPragmas = "@(export|visibility) {"; GoogleStyle.MaxEmptyLinesToKeep = 3; GoogleStyle.SpacesInContainerLiterals = false; } else if (Language == FormatStyle::LK_Proto) { @@ -1238,6 +1239,8 @@ private: FormatTok->Type = TT_ImplicitStringLiteral; break; } + if (FormatTok->Type == TT_ImplicitStringLiteral) + break; } if (FormatTok->is(TT_ImplicitStringLiteral)) @@ -1901,8 +1904,9 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), new DiagnosticOptions); SourceManager SourceMgr(Diagnostics, Files); - InMemoryFileSystem->addFile(FileName, 0, - llvm::MemoryBuffer::getMemBuffer(Code, FileName)); + InMemoryFileSystem->addFile( + FileName, 0, llvm::MemoryBuffer::getMemBuffer( + Code, FileName, /*RequiresNullTerminator=*/false)); FileID ID = SourceMgr.createFileID(Files.getFile(FileName), SourceLocation(), clang::SrcMgr::C_User); SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID); diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp index caff131..8fbb43b 100644 --- a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp +++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp @@ -119,7 +119,9 @@ private: } } - if (Left->Previous && + if (Left->is(TT_OverloadedOperatorLParen)) { + Contexts.back().IsExpression = false; + } else if (Left->Previous && (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype, tok::kw_if, tok::kw_while, tok::l_paren, tok::comma) || @@ -132,9 +134,7 @@ private: // This is a parameter list of a lambda expression. Contexts.back().IsExpression = false; } else if (Line.InPPDirective && - (!Left->Previous || - !Left->Previous->isOneOf(tok::identifier, - TT_OverloadedOperator))) { + (!Left->Previous || !Left->Previous->is(tok::identifier))) { Contexts.back().IsExpression = true; } else if (Contexts[Contexts.size() - 2].CaretFound) { // This is the parameter list of an ObjC block. @@ -199,6 +199,18 @@ private: Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; + if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) && + Left->Previous && Left->Previous->is(tok::l_paren)) { + // Detect the case where macros are used to generate lambdas or + // function bodies, e.g.: + // auto my_lambda = MARCO((Type *type, int i) { .. body .. }); + for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) { + if (Tok->is(TT_BinaryOperator) && + Tok->isOneOf(tok::star, tok::amp, tok::ampamp)) + Tok->Type = TT_PointerOrReference; + } + } + if (StartsObjCMethodExpr) { CurrentToken->Type = TT_ObjCMethodExpr; if (Contexts.back().FirstObjCSelectorName) { @@ -568,7 +580,8 @@ private: if (CurrentToken->isOneOf(tok::star, tok::amp)) CurrentToken->Type = TT_PointerOrReference; consumeToken(); - if (CurrentToken && CurrentToken->Previous->is(TT_BinaryOperator)) + if (CurrentToken && + CurrentToken->Previous->isOneOf(TT_BinaryOperator, tok::comma)) CurrentToken->Previous->Type = TT_OverloadedOperator; } if (CurrentToken) { @@ -1713,7 +1726,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma)) return 100; if (Left.is(TT_JsTypeColon)) - return 100; + return 35; } if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next && @@ -2058,14 +2071,14 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) || Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) return true; + if (Right.is(TT_OverloadedOperatorLParen)) + return Style.SpaceBeforeParens == FormatStyle::SBPO_Always; if (Left.is(tok::comma)) return true; if (Right.is(tok::comma)) return false; if (Right.isOneOf(TT_CtorInitializerColon, TT_ObjCBlockLParen)) return true; - if (Right.is(TT_OverloadedOperatorLParen)) - return Style.SpaceBeforeParens == FormatStyle::SBPO_Always; if (Right.is(tok::colon)) { if (Line.First->isOneOf(tok::kw_case, tok::kw_default) || !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi)) diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp index 94b8498..7b8f6e6 100644 --- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp +++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp @@ -315,6 +315,7 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { // definitions, too. unsigned StoredPosition = Tokens->getPosition(); FormatToken *Tok = FormatTok; + const FormatToken *PrevTok = getPreviousToken(); // Keep a stack of positions of lbrace tokens. We will // update information about whether an lbrace starts a // braced init list or a different block during the loop. @@ -331,47 +332,53 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { switch (Tok->Tok.getKind()) { case tok::l_brace: - Tok->BlockKind = BK_Unknown; + if (Style.Language == FormatStyle::LK_JavaScript && PrevTok && + PrevTok->is(tok::colon)) + // In TypeScript's TypeMemberLists, there can be semicolons between the + // individual members. + Tok->BlockKind = BK_BracedInit; + else + Tok->BlockKind = BK_Unknown; LBraceStack.push_back(Tok); break; case tok::r_brace: - if (!LBraceStack.empty()) { - if (LBraceStack.back()->BlockKind == BK_Unknown) { - bool ProbablyBracedList = false; - if (Style.Language == FormatStyle::LK_Proto) { - ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); - } else { - // Using OriginalColumn to distinguish between ObjC methods and - // binary operators is a bit hacky. - bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && - NextTok->OriginalColumn == 0; - - // If there is a comma, semicolon or right paren after the closing - // brace, we assume this is a braced initializer list. Note that - // regardless how we mark inner braces here, we will overwrite the - // BlockKind later if we parse a braced list (where all blocks - // inside are by default braced lists), or when we explicitly detect - // blocks (for example while parsing lambdas). - // - // We exclude + and - as they can be ObjC visibility modifiers. - ProbablyBracedList = - NextTok->isOneOf(tok::comma, tok::period, tok::colon, - tok::r_paren, tok::r_square, tok::l_brace, - tok::l_square, tok::l_paren, tok::ellipsis) || - (NextTok->is(tok::semi) && - (!ExpectClassBody || LBraceStack.size() != 1)) || - (NextTok->isBinaryOperator() && !NextIsObjCMethod); - } - if (ProbablyBracedList) { - Tok->BlockKind = BK_BracedInit; - LBraceStack.back()->BlockKind = BK_BracedInit; - } else { - Tok->BlockKind = BK_Block; - LBraceStack.back()->BlockKind = BK_Block; - } + if (LBraceStack.empty()) + break; + if (LBraceStack.back()->BlockKind == BK_Unknown) { + bool ProbablyBracedList = false; + if (Style.Language == FormatStyle::LK_Proto) { + ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); + } else { + // Using OriginalColumn to distinguish between ObjC methods and + // binary operators is a bit hacky. + bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && + NextTok->OriginalColumn == 0; + + // If there is a comma, semicolon or right paren after the closing + // brace, we assume this is a braced initializer list. Note that + // regardless how we mark inner braces here, we will overwrite the + // BlockKind later if we parse a braced list (where all blocks + // inside are by default braced lists), or when we explicitly detect + // blocks (for example while parsing lambdas). + // + // We exclude + and - as they can be ObjC visibility modifiers. + ProbablyBracedList = + NextTok->isOneOf(tok::comma, tok::period, tok::colon, + tok::r_paren, tok::r_square, tok::l_brace, + tok::l_square, tok::l_paren, tok::ellipsis) || + (NextTok->is(tok::semi) && + (!ExpectClassBody || LBraceStack.size() != 1)) || + (NextTok->isBinaryOperator() && !NextIsObjCMethod); + } + if (ProbablyBracedList) { + Tok->BlockKind = BK_BracedInit; + LBraceStack.back()->BlockKind = BK_BracedInit; + } else { + Tok->BlockKind = BK_Block; + LBraceStack.back()->BlockKind = BK_Block; } - LBraceStack.pop_back(); } + LBraceStack.pop_back(); break; case tok::at: case tok::semi: @@ -381,14 +388,16 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { case tok::kw_switch: case tok::kw_try: case tok::kw___try: - if (!LBraceStack.empty()) + if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) LBraceStack.back()->BlockKind = BK_Block; break; default: break; } + PrevTok = Tok; Tok = NextTok; } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); + // Assume other blocks for all unclosed opening braces. for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { if (LBraceStack[i]->BlockKind == BK_Unknown) @@ -841,6 +850,8 @@ void UnwrappedLineParser::parseStructuralElement() { // This does not apply for Java and JavaScript. if (Style.Language == FormatStyle::LK_Java || Style.Language == FormatStyle::LK_JavaScript) { + if (FormatTok->is(tok::semi)) + nextToken(); addUnwrappedLine(); return; } @@ -986,13 +997,11 @@ bool UnwrappedLineParser::tryToParseLambda() { nextToken(); return false; } - // FIXME: This is a dirty way to access the previous token. Find a better - // solution. - if (!Line->Tokens.empty() && - (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator, - tok::kw_new, tok::kw_delete) || - Line->Tokens.back().Tok->closesScope() || - Line->Tokens.back().Tok->isSimpleTypeSpecifier())) { + const FormatToken* Previous = getPreviousToken(); + if (Previous && + (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, + tok::kw_delete) || + Previous->closesScope() || Previous->isSimpleTypeSpecifier())) { nextToken(); return false; } @@ -1174,6 +1183,14 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { nextToken(); return !HasError; case tok::semi: + // JavaScript (or more precisely TypeScript) can have semicolons in braced + // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be + // used for error recovery if we have otherwise determined that this is + // a braced list. + if (Style.Language == FormatStyle::LK_JavaScript) { + nextToken(); + break; + } HasError = true; if (!ContinueOnSemicolons) return !HasError; @@ -1792,18 +1809,22 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { return; } + // Consume the "abstract" in "export abstract class". + if (FormatTok->is(Keywords.kw_abstract)) + nextToken(); + if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, tok::kw_enum, - Keywords.kw_let, Keywords.kw_var)) + Keywords.kw_interface, Keywords.kw_let, + Keywords.kw_var)) return; // Fall through to parsing the corresponding structure. - if (FormatTok->is(tok::l_brace)) { - FormatTok->BlockKind = BK_Block; - parseBracedList(); - } - - while (!eof() && FormatTok->isNot(tok::semi) && - FormatTok->isNot(tok::l_brace)) { - nextToken(); + while (!eof() && FormatTok->isNot(tok::semi)) { + if (FormatTok->is(tok::l_brace)) { + FormatTok->BlockKind = BK_Block; + parseBracedList(); + } else { + nextToken(); + } } } @@ -1877,6 +1898,14 @@ void UnwrappedLineParser::nextToken() { readToken(); } +const FormatToken *UnwrappedLineParser::getPreviousToken() { + // FIXME: This is a dirty way to access the previous token. Find a better + // solution. + if (!Line || Line->Tokens.empty()) + return nullptr; + return Line->Tokens.back().Tok; +} + void UnwrappedLineParser::readToken() { bool CommentsInCurrentLine = true; do { diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h index a13c03f..6d40ab4 100644 --- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h +++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h @@ -110,6 +110,7 @@ private: void addUnwrappedLine(); bool eof() const; void nextToken(); + const FormatToken *getPreviousToken(); void readToken(); void flushComments(bool NewlineBeforeNext); void pushToken(FormatToken *Tok); diff --git a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp index 725f05b..d6e6ed2 100644 --- a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp +++ b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp @@ -30,7 +30,7 @@ WhitespaceManager::Change::Change( unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn, unsigned NewlinesBefore, StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective, - bool IsStartOfDeclName) + bool IsStartOfDeclName, bool IsInsideToken) : CreateReplacement(CreateReplacement), OriginalWhitespaceRange(OriginalWhitespaceRange), StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore), @@ -38,8 +38,8 @@ WhitespaceManager::Change::Change( CurrentLinePrefix(CurrentLinePrefix), Kind(Kind), ContinuesPPDirective(ContinuesPPDirective), IsStartOfDeclName(IsStartOfDeclName), IndentLevel(IndentLevel), - Spaces(Spaces), IsTrailingComment(false), TokenLength(0), - PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0), + Spaces(Spaces), IsInsideToken(IsInsideToken), IsTrailingComment(false), + TokenLength(0), PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0), StartOfBlockComment(nullptr), IndentationOffset(0) {} void WhitespaceManager::reset() { @@ -55,20 +55,23 @@ void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines, return; Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue; Changes.push_back( - Change(true, Tok.WhitespaceRange, IndentLevel, Spaces, StartOfTokenColumn, - Newlines, "", "", Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst, - Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName))); + Change(/*CreateReplacement=*/true, Tok.WhitespaceRange, IndentLevel, + Spaces, StartOfTokenColumn, Newlines, "", "", Tok.Tok.getKind(), + InPPDirective && !Tok.IsFirst, + Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName), + /*IsInsideToken=*/false)); } void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, bool InPPDirective) { if (Tok.Finalized) return; - Changes.push_back( - Change(false, Tok.WhitespaceRange, /*IndentLevel=*/0, - /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, "", "", - Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst, - Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName))); + Changes.push_back(Change( + /*CreateReplacement=*/false, Tok.WhitespaceRange, /*IndentLevel=*/0, + /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, "", "", + Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst, + Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName), + /*IsInsideToken=*/false)); } void WhitespaceManager::replaceWhitespaceInToken( @@ -81,15 +84,10 @@ void WhitespaceManager::replaceWhitespaceInToken( Changes.push_back(Change( true, SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), IndentLevel, Spaces, std::max(0, Spaces), Newlines, PreviousPostfix, - CurrentPrefix, - // If we don't add a newline this change doesn't start a comment. Thus, - // when we align line comments, we don't need to treat this change as one. - // FIXME: We still need to take this change in account to properly - // calculate the new length of the comment and to calculate the changes - // for which to do the alignment when aligning comments. - Tok.is(TT_LineComment) && Newlines > 0 ? tok::comment : tok::unknown, + CurrentPrefix, Tok.is(TT_LineComment) ? tok::comment : tok::unknown, InPPDirective && !Tok.IsFirst, - Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName))); + Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName), + /*IsInsideToken=*/Newlines == 0)); } const tooling::Replacements &WhitespaceManager::generateReplacements() { @@ -109,6 +107,7 @@ const tooling::Replacements &WhitespaceManager::generateReplacements() { void WhitespaceManager::calculateLineBreakInformation() { Changes[0].PreviousEndOfTokenColumn = 0; + Change *LastOutsideTokenChange = &Changes[0]; for (unsigned i = 1, e = Changes.size(); i != e; ++i) { unsigned OriginalWhitespaceStart = SourceMgr.getFileOffset(Changes[i].OriginalWhitespaceRange.getBegin()); @@ -119,11 +118,20 @@ void WhitespaceManager::calculateLineBreakInformation() { Changes[i].PreviousLinePostfix.size() + Changes[i - 1].CurrentLinePrefix.size(); + // If there are multiple changes in this token, sum up all the changes until + // the end of the line. + if (Changes[i - 1].IsInsideToken) + LastOutsideTokenChange->TokenLength += + Changes[i - 1].TokenLength + Changes[i - 1].Spaces; + else + LastOutsideTokenChange = &Changes[i - 1]; + Changes[i].PreviousEndOfTokenColumn = Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength; Changes[i - 1].IsTrailingComment = - (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof) && + (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof || + (Changes[i].IsInsideToken && Changes[i].Kind == tok::comment)) && Changes[i - 1].Kind == tok::comment; } // FIXME: The last token is currently not always an eof token; in those @@ -133,6 +141,10 @@ void WhitespaceManager::calculateLineBreakInformation() { const WhitespaceManager::Change *LastBlockComment = nullptr; for (auto &Change : Changes) { + // Reset the IsTrailingComment flag for changes inside of trailing comments + // so they don't get realigned later. + if (Change.IsInsideToken) + Change.IsTrailingComment = false; Change.StartOfBlockComment = nullptr; Change.IndentationOffset = 0; if (Change.Kind == tok::comment) { @@ -342,6 +354,12 @@ void WhitespaceManager::alignTrailingComments() { unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn; unsigned ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength; + + // If we don't create a replacement for this change, we have to consider + // it to be immovable. + if (!Changes[i].CreateReplacement) + ChangeMaxColumn = ChangeMinColumn; + if (i + 1 != e && Changes[i + 1].ContinuesPPDirective) ChangeMaxColumn -= 2; // If this comment follows an } in column 0, it probably documents the diff --git a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h index f83971b..9ca9db6 100644 --- a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h +++ b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h @@ -109,7 +109,8 @@ public: unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn, unsigned NewlinesBefore, StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, tok::TokenKind Kind, - bool ContinuesPPDirective, bool IsStartOfDeclName); + bool ContinuesPPDirective, bool IsStartOfDeclName, + bool IsInsideToken); bool CreateReplacement; // Changes might be in the middle of a token, so we cannot just keep the @@ -139,6 +140,10 @@ public: // comments. Uncompensated negative offset is truncated to 0. int Spaces; + // If this change is inside of a token but not at the start of the token or + // directly after a newline. + bool IsInsideToken; + // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and // \c EscapedNewlineColumn will be calculated in // \c calculateLineBreakInformation. diff --git a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp index 3a32f47..237a447 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp @@ -1,4 +1,4 @@ -//===--- +//===--- CompilerInvocation.cpp -------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -2202,8 +2202,11 @@ std::string CompilerInvocation::getModuleHash() const { code = hash_combine(code, I->first, I->second); } - // Extend the signature with the sysroot. - code = hash_combine(code, hsOpts.Sysroot, hsOpts.UseBuiltinIncludes, + // Extend the signature with the sysroot and other header search options. + code = hash_combine(code, hsOpts.Sysroot, + hsOpts.ModuleFormat, + hsOpts.UseDebugInfo, + hsOpts.UseBuiltinIncludes, hsOpts.UseStandardSystemIncludes, hsOpts.UseStandardCXXIncludes, hsOpts.UseLibcxx); diff --git a/contrib/llvm/tools/clang/lib/Frontend/FrontendActions.cpp b/contrib/llvm/tools/clang/lib/Frontend/FrontendActions.cpp index d6c88d2..407ccea 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/FrontendActions.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/FrontendActions.cpp @@ -187,15 +187,17 @@ collectModuleHeaderIncludes(const LangOptions &LangOpts, FileManager &FileMgr, return std::error_code(); // Add includes for each of these headers. - for (Module::Header &H : Module->Headers[Module::HK_Normal]) { - Module->addTopHeader(H.Entry); - // Use the path as specified in the module map file. We'll look for this - // file relative to the module build directory (the directory containing - // the module map file) so this will find the same file that we found - // while parsing the module map. - if (std::error_code Err = addHeaderInclude(H.NameAsWritten, Includes, - LangOpts, Module->IsExternC)) - return Err; + for (auto HK : {Module::HK_Normal, Module::HK_Private}) { + for (Module::Header &H : Module->Headers[HK]) { + Module->addTopHeader(H.Entry); + // Use the path as specified in the module map file. We'll look for this + // file relative to the module build directory (the directory containing + // the module map file) so this will find the same file that we found + // while parsing the module map. + if (std::error_code Err = addHeaderInclude(H.NameAsWritten, Includes, + LangOpts, Module->IsExternC)) + return Err; + } } // Note that Module->PrivateHeaders will not be a TopHeader. diff --git a/contrib/llvm/tools/clang/lib/Frontend/MultiplexConsumer.cpp b/contrib/llvm/tools/clang/lib/Frontend/MultiplexConsumer.cpp index 12c8524..f8b73e9 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/MultiplexConsumer.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/MultiplexConsumer.cpp @@ -119,6 +119,7 @@ public: const FunctionDecl *Delete) override; void CompletedImplicitDefinition(const FunctionDecl *D) override; void StaticDataMemberInstantiated(const VarDecl *D) override; + void DefaultArgumentInstantiated(const ParmVarDecl *D) override; void AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD, const ObjCInterfaceDecl *IFD) override; void FunctionDefinitionInstantiated(const FunctionDecl *D) override; @@ -193,6 +194,11 @@ void MultiplexASTMutationListener::StaticDataMemberInstantiated( for (size_t i = 0, e = Listeners.size(); i != e; ++i) Listeners[i]->StaticDataMemberInstantiated(D); } +void MultiplexASTMutationListener::DefaultArgumentInstantiated( + const ParmVarDecl *D) { + for (size_t i = 0, e = Listeners.size(); i != e; ++i) + Listeners[i]->DefaultArgumentInstantiated(D); +} void MultiplexASTMutationListener::AddedObjCCategoryToInterface( const ObjCCategoryDecl *CatD, const ObjCInterfaceDecl *IFD) { diff --git a/contrib/llvm/tools/clang/lib/Headers/altivec.h b/contrib/llvm/tools/clang/lib/Headers/altivec.h index dc0dcbc..a5b4f74 100644 --- a/contrib/llvm/tools/clang/lib/Headers/altivec.h +++ b/contrib/llvm/tools/clang/lib/Headers/altivec.h @@ -1891,6 +1891,22 @@ static vector float __ATTRS_o_ai vec_ctf(vector unsigned int __a, int __b) { return __builtin_altivec_vcfux((vector int)__a, __b); } +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_ctf(vector unsigned long long __a, + int __b) { + vector double __ret = __builtin_convertvector(__a, vector double); + __ret *= (vector double)(vector unsigned long long)((0x3ffULL - __b) << 52); + return __ret; +} + +static vector double __ATTRS_o_ai vec_ctf(vector signed long long __a, + int __b) { + vector double __ret = __builtin_convertvector(__a, vector double); + __ret *= (vector double)(vector unsigned long long)((0x3ffULL - __b) << 52); + return __ret; +} +#endif + /* vec_vcfsx */ static vector float __attribute__((__always_inline__)) @@ -1907,11 +1923,18 @@ vec_vcfux(vector unsigned int __a, int __b) { /* vec_cts */ -static vector int __attribute__((__always_inline__)) -vec_cts(vector float __a, int __b) { +static vector int __ATTRS_o_ai vec_cts(vector float __a, int __b) { return __builtin_altivec_vctsxs(__a, __b); } +#ifdef __VSX__ +static vector signed long long __ATTRS_o_ai vec_cts(vector double __a, + int __b) { + __a *= (vector double)(vector unsigned long long)((0x3ffULL + __b) << 52); + return __builtin_convertvector(__a, vector signed long long); +} +#endif + /* vec_vctsxs */ static vector int __attribute__((__always_inline__)) @@ -1921,11 +1944,18 @@ vec_vctsxs(vector float __a, int __b) { /* vec_ctu */ -static vector unsigned int __attribute__((__always_inline__)) -vec_ctu(vector float __a, int __b) { +static vector unsigned int __ATTRS_o_ai vec_ctu(vector float __a, int __b) { return __builtin_altivec_vctuxs(__a, __b); } +#ifdef __VSX__ +static vector unsigned long long __ATTRS_o_ai vec_ctu(vector double __a, + int __b) { + __a *= (vector double)(vector unsigned long long)((0x3ffULL + __b) << 52); + return __builtin_convertvector(__a, vector unsigned long long); +} +#endif + /* vec_vctuxs */ static vector unsigned int __attribute__((__always_inline__)) diff --git a/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp b/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp index 8a686a7..2d005dd 100644 --- a/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp @@ -153,8 +153,7 @@ std::string HeaderSearch::getModuleFileName(StringRef ModuleName, auto FileName = llvm::sys::path::filename(ModuleMapPath); llvm::hash_code Hash = - llvm::hash_combine(DirName.lower(), FileName.lower(), - HSOpts->ModuleFormat, HSOpts->UseDebugInfo); + llvm::hash_combine(DirName.lower(), FileName.lower()); SmallString<128> HashStr; llvm::APInt(64, size_t(Hash)).toStringUnsigned(HashStr, /*Radix*/36); diff --git a/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp b/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp index 1e7858a..5b1c493 100644 --- a/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp @@ -983,6 +983,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { /// u' c-char-sequence ' /// U' c-char-sequence ' /// L' c-char-sequence ' +/// u8' c-char-sequence ' [C++1z lex.ccon] /// c-char-sequence: /// c-char /// c-char-sequence c-char diff --git a/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp b/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp index 3134790..afb41a2 100644 --- a/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp @@ -876,6 +876,22 @@ struct PragmaDebugHandler : public PragmaHandler { Crasher.setKind(tok::annot_pragma_parser_crash); Crasher.setAnnotationRange(SourceRange(Tok.getLocation())); PP.EnterToken(Crasher); + } else if (II->isStr("dump")) { + Token Identifier; + PP.LexUnexpandedToken(Identifier); + if (auto *DumpII = Identifier.getIdentifierInfo()) { + Token DumpAnnot; + DumpAnnot.startToken(); + DumpAnnot.setKind(tok::annot_pragma_dump); + DumpAnnot.setAnnotationRange( + SourceRange(Tok.getLocation(), Identifier.getLocation())); + DumpAnnot.setAnnotationValue(DumpII); + PP.DiscardUntilEndOfDirective(); + PP.EnterToken(DumpAnnot); + } else { + PP.Diag(Identifier, diag::warn_pragma_debug_missing_argument) + << II->getName(); + } } else if (II->isStr("llvm_fatal_error")) { llvm::report_fatal_error("#pragma clang __debug llvm_fatal_error"); } else if (II->isStr("llvm_unreachable")) { @@ -887,7 +903,8 @@ struct PragmaDebugHandler : public PragmaHandler { if (MacroII) PP.dumpMacroInfo(MacroII); else - PP.Diag(MacroName, diag::warn_pragma_diagnostic_invalid); + PP.Diag(MacroName, diag::warn_pragma_debug_missing_argument) + << II->getName(); } else if (II->isStr("overflow_stack")) { DebugOverflowStack(); } else if (II->isStr("handle_crash")) { diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp index e69bb27..c64b97d 100644 --- a/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp +++ b/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp @@ -3326,6 +3326,15 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, case tok::kw___bool: isInvalid = DS.SetTypeAltiVecBool(true, Loc, PrevSpec, DiagID, Policy); break; + case tok::kw_pipe: + if (!getLangOpts().OpenCL || (getLangOpts().OpenCLVersion < 200)) { + // OpenCL 2.0 defined this keyword. OpenCL 1.2 and earlier should + // support the "pipe" word as identifier. + Tok.getIdentifierInfo()->revertTokenIDToIdentifier(); + goto DoneWithDeclSpec; + } + isInvalid = DS.SetTypePipe(true, Loc, PrevSpec, DiagID, Policy); + break; case tok::kw___unknown_anytype: isInvalid = DS.SetTypeSpecType(TST_unknown_anytype, Loc, PrevSpec, DiagID, Policy); @@ -4401,6 +4410,9 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { switch (Tok.getKind()) { default: return false; + case tok::kw_pipe: + return getLangOpts().OpenCL && (getLangOpts().OpenCLVersion >= 200); + case tok::identifier: // foo::bar // Unfortunate hack to support "Class.factoryMethod" notation. if (getLangOpts().ObjC1 && NextToken().is(tok::period)) @@ -4847,6 +4859,9 @@ static bool isPtrOperatorToken(tok::TokenKind Kind, const LangOptions &Lang, if (Kind == tok::star || Kind == tok::caret) return true; + if ((Kind == tok::kw_pipe) && Lang.OpenCL && (Lang.OpenCLVersion >= 200)) + return true; + if (!Lang.CPlusPlus) return false; @@ -4865,6 +4880,17 @@ static bool isPtrOperatorToken(tok::TokenKind Kind, const LangOptions &Lang, return false; } +// Indicates whether the given declarator is a pipe declarator. +static bool isPipeDeclerator(const Declarator &D) { + const unsigned NumTypes = D.getNumTypeObjects(); + + for (unsigned Idx = 0; Idx != NumTypes; ++Idx) + if (DeclaratorChunk::Pipe == D.getTypeObject(Idx).Kind) + return true; + + return false; +} + /// ParseDeclaratorInternal - Parse a C or C++ declarator. The direct-declarator /// is parsed by the function passed to it. Pass null, and the direct-declarator /// isn't parsed at all, making this function effectively parse the C++ @@ -4941,6 +4967,15 @@ void Parser::ParseDeclaratorInternal(Declarator &D, } tok::TokenKind Kind = Tok.getKind(); + + if (D.getDeclSpec().isTypeSpecPipe() && !isPipeDeclerator(D)) { + DeclSpec &DS = D.getMutableDeclSpec(); + + D.AddTypeInfo( + DeclaratorChunk::getPipe(DS.getTypeQualifiers(), DS.getPipeLoc()), + DS.getAttributes(), SourceLocation()); + } + // Not a pointer, C++ reference, or block. if (!isPtrOperatorToken(Kind, getLangOpts(), D.getContext())) { if (DirectDeclParser) @@ -6092,6 +6127,7 @@ void Parser::ParseMisplacedBracketDeclarator(Declarator &D) { case DeclaratorChunk::Reference: case DeclaratorChunk::BlockPointer: case DeclaratorChunk::MemberPointer: + case DeclaratorChunk::Pipe: NeedParens = true; break; case DeclaratorChunk::Array: diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseDeclCXX.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseDeclCXX.cpp index a4de975..3f22ad4 100644 --- a/contrib/llvm/tools/clang/lib/Parse/ParseDeclCXX.cpp +++ b/contrib/llvm/tools/clang/lib/Parse/ParseDeclCXX.cpp @@ -3363,7 +3363,8 @@ Parser::tryParseExceptionSpecification(bool Delayed, ConsumeAndStoreUntil(tok::r_paren, *ExceptionSpecTokens, /*StopAtSemi=*/true, /*ConsumeFinalToken=*/true); - SpecificationRange.setEnd(Tok.getLocation()); + SpecificationRange.setEnd(ExceptionSpecTokens->back().getLocation()); + return EST_Unparsed; } diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp index 078f4c3..a08db54 100644 --- a/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp @@ -165,8 +165,8 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirective() { /// 'distribute' /// annot_pragma_openmp_end /// -StmtResult -Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) { +StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( + AllowedContsructsKind Allowed) { assert(Tok.is(tok::annot_pragma_openmp) && "Not an OpenMP directive!"); ParenBraceBracketBalancer BalancerRAIIObj(*this); SmallVector<Expr *, 5> Identifiers; @@ -186,6 +186,10 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) { switch (DKind) { case OMPD_threadprivate: + if (Allowed != ACK_Any) { + Diag(Tok, diag::err_omp_immediate_directive) + << getOpenMPDirectiveName(DKind) << 0; + } ConsumeToken(); if (!ParseOpenMPSimpleVarList(OMPD_threadprivate, Identifiers, false)) { // The last seen token is annot_pragma_openmp_end - need to check for @@ -213,7 +217,7 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) { case OMPD_taskwait: case OMPD_cancellation_point: case OMPD_cancel: - if (!StandAloneAllowed) { + if (Allowed == ACK_StatementsOpenMPNonStandalone) { Diag(Tok, diag::err_omp_immediate_directive) << getOpenMPDirectiveName(DKind) << 0; } @@ -299,7 +303,7 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) { // If the depend clause is specified, the ordered construct is a stand-alone // directive. if (DKind == OMPD_ordered && FirstClauses[OMPC_depend].getInt()) { - if (!StandAloneAllowed) { + if (Allowed == ACK_StatementsOpenMPNonStandalone) { Diag(Loc, diag::err_omp_immediate_directive) << getOpenMPDirectiveName(DKind) << 1 << getOpenMPClauseName(OMPC_depend); diff --git a/contrib/llvm/tools/clang/lib/Parse/ParsePragma.cpp b/contrib/llvm/tools/clang/lib/Parse/ParsePragma.cpp index 4430eb8..bc70942 100644 --- a/contrib/llvm/tools/clang/lib/Parse/ParsePragma.cpp +++ b/contrib/llvm/tools/clang/lib/Parse/ParsePragma.cpp @@ -377,6 +377,14 @@ void Parser::HandlePragmaAlign() { Actions.ActOnPragmaOptionsAlign(Kind, PragmaLoc); } +void Parser::HandlePragmaDump() { + assert(Tok.is(tok::annot_pragma_dump)); + IdentifierInfo *II = + reinterpret_cast<IdentifierInfo *>(Tok.getAnnotationValue()); + Actions.ActOnPragmaDump(getCurScope(), Tok.getLocation(), II); + ConsumeToken(); +} + void Parser::HandlePragmaWeak() { assert(Tok.is(tok::annot_pragma_weak)); SourceLocation PragmaLoc = ConsumeToken(); diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseStmt.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseStmt.cpp index 717bcff..edf0dda 100644 --- a/contrib/llvm/tools/clang/lib/Parse/ParseStmt.cpp +++ b/contrib/llvm/tools/clang/lib/Parse/ParseStmt.cpp @@ -32,14 +32,18 @@ using namespace clang; /// \brief Parse a standalone statement (for instance, as the body of an 'if', /// 'while', or 'for'). -StmtResult Parser::ParseStatement(SourceLocation *TrailingElseLoc) { +StmtResult Parser::ParseStatement(SourceLocation *TrailingElseLoc, + bool AllowOpenMPStandalone) { StmtResult Res; // We may get back a null statement if we found a #pragma. Keep going until // we get an actual statement. do { StmtVector Stmts; - Res = ParseStatementOrDeclaration(Stmts, true, TrailingElseLoc); + Res = ParseStatementOrDeclaration( + Stmts, AllowOpenMPStandalone ? ACK_StatementsOpenMPAnyExecutable + : ACK_StatementsOpenMPNonStandalone, + TrailingElseLoc); } while (!Res.isInvalid() && !Res.get()); return Res; @@ -95,7 +99,8 @@ StmtResult Parser::ParseStatement(SourceLocation *TrailingElseLoc) { /// [OBC] '@' 'throw' ';' /// StmtResult -Parser::ParseStatementOrDeclaration(StmtVector &Stmts, bool OnlyStatement, +Parser::ParseStatementOrDeclaration(StmtVector &Stmts, + AllowedContsructsKind Allowed, SourceLocation *TrailingElseLoc) { ParenBraceBracketBalancer BalancerRAIIObj(*this); @@ -103,8 +108,8 @@ Parser::ParseStatementOrDeclaration(StmtVector &Stmts, bool OnlyStatement, ParsedAttributesWithRange Attrs(AttrFactory); MaybeParseCXX11Attributes(Attrs, nullptr, /*MightBeObjCMessageSend*/ true); - StmtResult Res = ParseStatementOrDeclarationAfterAttributes(Stmts, - OnlyStatement, TrailingElseLoc, Attrs); + StmtResult Res = ParseStatementOrDeclarationAfterAttributes( + Stmts, Allowed, TrailingElseLoc, Attrs); assert((Attrs.empty() || Res.isInvalid() || Res.isUsable()) && "attributes on empty statement"); @@ -146,7 +151,7 @@ private: StmtResult Parser::ParseStatementOrDeclarationAfterAttributes(StmtVector &Stmts, - bool OnlyStatement, SourceLocation *TrailingElseLoc, + AllowedContsructsKind Allowed, SourceLocation *TrailingElseLoc, ParsedAttributesWithRange &Attrs) { const char *SemiError = nullptr; StmtResult Res; @@ -202,7 +207,8 @@ Retry: } default: { - if ((getLangOpts().CPlusPlus || !OnlyStatement) && isDeclarationStatement()) { + if ((getLangOpts().CPlusPlus || Allowed == ACK_Any) && + isDeclarationStatement()) { SourceLocation DeclStart = Tok.getLocation(), DeclEnd; DeclGroupPtrTy Decl = ParseDeclaration(Declarator::BlockContext, DeclEnd, Attrs); @@ -346,7 +352,7 @@ Retry: case tok::annot_pragma_openmp: ProhibitAttributes(Attrs); - return ParseOpenMPDeclarativeOrExecutableDirective(!OnlyStatement); + return ParseOpenMPDeclarativeOrExecutableDirective(Allowed); case tok::annot_pragma_ms_pointers_to_members: ProhibitAttributes(Attrs); @@ -365,7 +371,11 @@ Retry: case tok::annot_pragma_loop_hint: ProhibitAttributes(Attrs); - return ParsePragmaLoopHint(Stmts, OnlyStatement, TrailingElseLoc, Attrs); + return ParsePragmaLoopHint(Stmts, Allowed, TrailingElseLoc, Attrs); + + case tok::annot_pragma_dump: + HandlePragmaDump(); + return StmtEmpty(); } // If we reached this code, the statement must end in a semicolon. @@ -583,7 +593,8 @@ StmtResult Parser::ParseLabeledStatement(ParsedAttributesWithRange &attrs) { // can't handle GNU attributes), so only call it in the one case where // GNU attributes are allowed. SubStmt = ParseStatementOrDeclarationAfterAttributes( - Stmts, /*OnlyStmts*/ true, nullptr, TempAttrs); + Stmts, /*Allowed=*/ACK_StatementsOpenMPNonStandalone, nullptr, + TempAttrs); if (!TempAttrs.empty() && !SubStmt.isInvalid()) SubStmt = Actions.ProcessStmtAttributes( SubStmt.get(), TempAttrs.getList(), TempAttrs.Range); @@ -722,7 +733,8 @@ StmtResult Parser::ParseCaseStatement(bool MissingCase, ExprResult Expr) { // continue parsing the sub-stmt. if (Case.isInvalid()) { if (TopLevelCase.isInvalid()) // No parsed case stmts. - return ParseStatement(); + return ParseStatement(/*TrailingElseLoc=*/nullptr, + /*AllowOpenMPStandalone=*/true); // Otherwise, just don't add it as a nested case. } else { // If this is the first case statement we parsed, it becomes TopLevelCase. @@ -742,7 +754,8 @@ StmtResult Parser::ParseCaseStatement(bool MissingCase, ExprResult Expr) { StmtResult SubStmt; if (Tok.isNot(tok::r_brace)) { - SubStmt = ParseStatement(); + SubStmt = ParseStatement(/*TrailingElseLoc=*/nullptr, + /*AllowOpenMPStandalone=*/true); } else { // Nicely diagnose the common error "switch (X) { case 4: }", which is // not valid. If ColonLoc doesn't point to a valid text location, there was @@ -794,7 +807,8 @@ StmtResult Parser::ParseDefaultStatement() { StmtResult SubStmt; if (Tok.isNot(tok::r_brace)) { - SubStmt = ParseStatement(); + SubStmt = ParseStatement(/*TrailingElseLoc=*/nullptr, + /*AllowOpenMPStandalone=*/true); } else { // Diagnose the common error "switch (X) {... default: }", which is // not valid. @@ -893,6 +907,9 @@ void Parser::ParseCompoundStatementLeadingPragmas() { case tok::annot_pragma_ms_vtordisp: HandlePragmaMSVtorDisp(); break; + case tok::annot_pragma_dump: + HandlePragmaDump(); + break; default: checkForPragmas = false; break; @@ -965,7 +982,7 @@ StmtResult Parser::ParseCompoundStatementBody(bool isStmtExpr) { StmtResult R; if (Tok.isNot(tok::kw___extension__)) { - R = ParseStatementOrDeclaration(Stmts, false); + R = ParseStatementOrDeclaration(Stmts, ACK_Any); } else { // __extension__ can start declarations and it can also be a unary // operator for expressions. Consume multiple __extension__ markers here @@ -1861,7 +1878,8 @@ StmtResult Parser::ParseReturnStatement() { return Actions.ActOnReturnStmt(ReturnLoc, R.get(), getCurScope()); } -StmtResult Parser::ParsePragmaLoopHint(StmtVector &Stmts, bool OnlyStatement, +StmtResult Parser::ParsePragmaLoopHint(StmtVector &Stmts, + AllowedContsructsKind Allowed, SourceLocation *TrailingElseLoc, ParsedAttributesWithRange &Attrs) { // Create temporary attribute list. @@ -1884,7 +1902,7 @@ StmtResult Parser::ParsePragmaLoopHint(StmtVector &Stmts, bool OnlyStatement, MaybeParseCXX11Attributes(Attrs); StmtResult S = ParseStatementOrDeclarationAfterAttributes( - Stmts, OnlyStatement, TrailingElseLoc, Attrs); + Stmts, Allowed, TrailingElseLoc, Attrs); Attrs.takeAllFrom(TempAttrs); return S; @@ -2182,7 +2200,7 @@ void Parser::ParseMicrosoftIfExistsStatement(StmtVector &Stmts) { // Condition is true, parse the statements. while (Tok.isNot(tok::r_brace)) { - StmtResult R = ParseStatementOrDeclaration(Stmts, false); + StmtResult R = ParseStatementOrDeclaration(Stmts, ACK_Any); if (R.isUsable()) Stmts.push_back(R.get()); } diff --git a/contrib/llvm/tools/clang/lib/Parse/Parser.cpp b/contrib/llvm/tools/clang/lib/Parse/Parser.cpp index b3eeb9d..ccefb3d 100644 --- a/contrib/llvm/tools/clang/lib/Parse/Parser.cpp +++ b/contrib/llvm/tools/clang/lib/Parse/Parser.cpp @@ -668,6 +668,9 @@ Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs, case tok::annot_pragma_ms_pragma: HandlePragmaMSPragma(); return DeclGroupPtrTy(); + case tok::annot_pragma_dump: + HandlePragmaDump(); + return DeclGroupPtrTy(); case tok::semi: // Either a C++11 empty-declaration or attribute-declaration. SingleDecl = Actions.ActOnEmptyDeclaration(getCurScope(), diff --git a/contrib/llvm/tools/clang/lib/Sema/DeclSpec.cpp b/contrib/llvm/tools/clang/lib/Sema/DeclSpec.cpp index d664d87..6f6c4ca 100644 --- a/contrib/llvm/tools/clang/lib/Sema/DeclSpec.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/DeclSpec.cpp @@ -270,6 +270,7 @@ bool Declarator::isDeclarationOfFunction() const { case DeclaratorChunk::Array: case DeclaratorChunk::BlockPointer: case DeclaratorChunk::MemberPointer: + case DeclaratorChunk::Pipe: return false; } llvm_unreachable("Invalid type chunk"); @@ -713,6 +714,22 @@ bool DeclSpec::SetTypeAltiVecVector(bool isAltiVecVector, SourceLocation Loc, return false; } +bool DeclSpec::SetTypePipe(bool isPipe, SourceLocation Loc, + const char *&PrevSpec, unsigned &DiagID, + const PrintingPolicy &Policy) { + + if (TypeSpecType != TST_unspecified) { + PrevSpec = DeclSpec::getSpecifierName((TST)TypeSpecType, Policy); + DiagID = diag::err_invalid_decl_spec_combination; + return true; + } + + if (isPipe) { + TypeSpecPipe = TSP_pipe; + } + return false; +} + bool DeclSpec::SetTypeAltiVecPixel(bool isAltiVecPixel, SourceLocation Loc, const char *&PrevSpec, unsigned &DiagID, const PrintingPolicy &Policy) { diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaCast.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaCast.cpp index 07b0589..ad1d7da 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaCast.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaCast.cpp @@ -2105,6 +2105,7 @@ void CastOperation::CheckCXXCStyleCast(bool FunctionalStyle, && (SrcExpr.get()->getType()->isIntegerType() || SrcExpr.get()->getType()->isFloatingType())) { Kind = CK_VectorSplat; + SrcExpr = Self.prepareVectorSplat(DestType, SrcExpr.get()); return; } @@ -2339,6 +2340,7 @@ void CastOperation::CheckCStyleCast() { if (DestVecTy->getVectorKind() == VectorType::AltiVecVector && (SrcType->isIntegerType() || SrcType->isFloatingType())) { Kind = CK_VectorSplat; + SrcExpr = Self.prepareVectorSplat(DestType, SrcExpr.get()); } else if (Self.CheckVectorCast(OpRange, DestType, SrcType, Kind)) { SrcExpr = ExprError(); } diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp index cbdcb5e..6c2834b 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp @@ -6243,7 +6243,8 @@ static IntRange GetExprRange(ASTContext &C, Expr *E, unsigned MaxWidth) { IntRange OutputTypeRange = IntRange::forValueOfType(C, GetExprType(CE)); - bool isIntegerCast = (CE->getCastKind() == CK_IntegralCast); + bool isIntegerCast = CE->getCastKind() == CK_IntegralCast || + CE->getCastKind() == CK_BooleanToSignedIntegral; // Assume that non-integer casts can span the full range of the type. if (!isIntegerCast) @@ -7047,6 +7048,10 @@ static void DiagnoseNullConversion(Sema &S, Expr *E, QualType T, E->getExprLoc())) return; + // Don't warn on functions which have return type nullptr_t. + if (isa<CallExpr>(E)) + return; + // Check for NULL (GNUNull) or nullptr (CXX11_nullptr). const Expr::NullPointerConstantKind NullKind = E->isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNotNull); @@ -7062,8 +7067,12 @@ static void DiagnoseNullConversion(Sema &S, Expr *E, QualType T, // __null is usually wrapped in a macro. Go up a macro if that is the case. if (NullKind == Expr::NPCK_GNUNull) { - if (Loc.isMacroID()) - Loc = S.SourceMgr.getImmediateExpansionRange(Loc).first; + if (Loc.isMacroID()) { + StringRef MacroName = + Lexer::getImmediateMacroName(Loc, S.SourceMgr, S.getLangOpts()); + if (MacroName == "NULL") + Loc = S.SourceMgr.getImmediateExpansionRange(Loc).first; + } } // Only warn if the null and context location are in the same macro expansion. @@ -7845,6 +7854,10 @@ void Sema::CheckBoolLikeConversion(Expr *E, SourceLocation CC) { void Sema::CheckForIntOverflow (Expr *E) { if (isa<BinaryOperator>(E->IgnoreParenCasts())) E->IgnoreParenCasts()->EvaluateForOverflow(Context); + else if (auto InitList = dyn_cast<InitListExpr>(E)) + for (Expr *E : InitList->inits()) + if (isa<BinaryOperator>(E->IgnoreParenCasts())) + E->IgnoreParenCasts()->EvaluateForOverflow(Context); } namespace { diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp index f27fb2b1..f95d106 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp @@ -3962,9 +3962,6 @@ static bool CheckAnonMemberRedeclaration(Sema &SemaRef, Sema::ForRedeclaration); if (!SemaRef.LookupName(R, S)) return false; - if (R.getAsSingle<TagDecl>()) - return false; - // Pick a representative declaration. NamedDecl *PrevDecl = R.getRepresentativeDecl()->getUnderlyingDecl(); assert(PrevDecl && "Expected a non-null Decl"); @@ -4675,11 +4672,13 @@ bool Sema::DiagnoseClassNameShadow(DeclContext *DC, DeclarationNameInfo NameInfo) { DeclarationName Name = NameInfo.getName(); - if (CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(DC)) - if (Record->getIdentifier() && Record->getDeclName() == Name) { - Diag(NameInfo.getLoc(), diag::err_member_name_of_class) << Name; - return true; - } + CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(DC); + while (Record && Record->isAnonymousStructOrUnion()) + Record = dyn_cast<CXXRecordDecl>(Record->getParent()); + if (Record && Record->getIdentifier() && Record->getDeclName() == Name) { + Diag(NameInfo.getLoc(), diag::err_member_name_of_class) << Name; + return true; + } return false; } @@ -8257,6 +8256,23 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, for (auto Param : NewFD->params()) checkIsValidOpenCLKernelParameter(*this, D, Param, ValidTypes); } + for (FunctionDecl::param_iterator PI = NewFD->param_begin(), + PE = NewFD->param_end(); PI != PE; ++PI) { + ParmVarDecl *Param = *PI; + QualType PT = Param->getType(); + + // OpenCL 2.0 pipe restrictions forbids pipe packet types to be non-value + // types. + if (getLangOpts().OpenCLVersion >= 200) { + if(const PipeType *PipeTy = PT->getAs<PipeType>()) { + QualType ElemTy = PipeTy->getElementType(); + if (ElemTy->isReferenceType() || ElemTy->isPointerType()) { + Diag(Param->getTypeSpecStartLoc(), diag::err_reference_pipe_type ); + D.setInvalidType(); + } + } + } + } MarkUnusedFileScopedDecl(NewFD); @@ -11799,6 +11815,28 @@ static bool isAcceptableTagRedeclContext(Sema &S, DeclContext *OldDC, return false; } +/// Find the DeclContext in which a tag is implicitly declared if we see an +/// elaborated type specifier in the specified context, and lookup finds +/// nothing. +static DeclContext *getTagInjectionContext(DeclContext *DC) { + while (!DC->isFileContext() && !DC->isFunctionOrMethod()) + DC = DC->getParent(); + return DC; +} + +/// Find the Scope in which a tag is implicitly declared if we see an +/// elaborated type specifier in the specified context, and lookup finds +/// nothing. +static Scope *getTagInjectionScope(Scope *S, const LangOptions &LangOpts) { + while (S->isClassScope() || + (LangOpts.CPlusPlus && + S->isFunctionPrototypeScope()) || + ((S->getFlags() & Scope::DeclScope) == 0) || + (S->getEntity() && S->getEntity()->isTransparentContext())) + S = S->getParent(); + return S; +} + /// \brief This is invoked when we see 'struct foo' or 'struct {'. In the /// former case, Name will be non-null. In the later case, Name will be null. /// TagSpec indicates what kind of tag this is. TUK indicates whether this is a @@ -12115,16 +12153,10 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, // Find the context where we'll be declaring the tag. // FIXME: We would like to maintain the current DeclContext as the // lexical context, - while (!SearchDC->isFileContext() && !SearchDC->isFunctionOrMethod()) - SearchDC = SearchDC->getParent(); + SearchDC = getTagInjectionContext(SearchDC); // Find the scope where we'll be declaring the tag. - while (S->isClassScope() || - (getLangOpts().CPlusPlus && - S->isFunctionPrototypeScope()) || - ((S->getFlags() & Scope::DeclScope) == 0) || - (S->getEntity() && S->getEntity()->isTransparentContext())) - S = S->getParent(); + S = getTagInjectionScope(S, getLangOpts()); } else { assert(TUK == TUK_Friend); // C++ [namespace.memdef]p3: @@ -12284,7 +12316,8 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, } else if (TUK == TUK_Reference && (PrevTagDecl->getFriendObjectKind() == Decl::FOK_Undeclared || - getOwningModule(PrevDecl) != + PP.getModuleContainingLocation( + PrevDecl->getLocation()) != PP.getModuleContainingLocation(KWLoc)) && SS.isEmpty()) { // This declaration is a reference to an existing entity, but @@ -12294,14 +12327,12 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, // the declaration would have meant the same thing if no prior // declaration were found, that is, if it was found in the same // scope where we would have injected a declaration. - DeclContext *InjectedDC = CurContext; - while (!InjectedDC->isFileContext() && - !InjectedDC->isFunctionOrMethod()) - InjectedDC = InjectedDC->getParent(); - if (!InjectedDC->getRedeclContext()->Equals( - PrevDecl->getDeclContext()->getRedeclContext())) + if (!getTagInjectionContext(CurContext)->getRedeclContext() + ->Equals(PrevDecl->getDeclContext()->getRedeclContext())) return PrevTagDecl; - // This is in the injected scope, create a new declaration. + // This is in the injected scope, create a new declaration in + // that scope. + S = getTagInjectionScope(S, getLangOpts()); } else { return PrevTagDecl; } @@ -12603,7 +12634,7 @@ CreateNewDecl: << Name; Invalid = true; } - } else { + } else if (!PrevDecl) { Diag(Loc, diag::warn_decl_in_param_list) << Context.getTagDeclType(New); } DeclsInPrototypeScope.push_back(New); diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp index 5a0f0f8..f94c822 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp @@ -348,6 +348,25 @@ static void handleSimpleAttribute(Sema &S, Decl *D, Attr.getAttributeSpellingListIndex())); } +template <typename AttrType> +static void handleSimpleAttributeWithExclusions(Sema &S, Decl *D, + const AttributeList &Attr) { + handleSimpleAttribute<AttrType>(S, D, Attr); +} + +/// \brief Applies the given attribute to the Decl so long as the Decl doesn't +/// already have one of the given incompatible attributes. +template <typename AttrType, typename IncompatibleAttrType, + typename... IncompatibleAttrTypes> +static void handleSimpleAttributeWithExclusions(Sema &S, Decl *D, + const AttributeList &Attr) { + if (checkAttrMutualExclusion<IncompatibleAttrType>(S, D, Attr.getRange(), + Attr.getName())) + return; + handleSimpleAttributeWithExclusions<AttrType, IncompatibleAttrTypes...>(S, D, + Attr); +} + /// \brief Check if the passed-in expression is of type int or bool. static bool isIntOrBool(Expr *Exp) { QualType QT = Exp->getType(); @@ -3588,6 +3607,12 @@ static void handleOptimizeNoneAttr(Sema &S, Decl *D, } static void handleGlobalAttr(Sema &S, Decl *D, const AttributeList &Attr) { + if (checkAttrMutualExclusion<CUDADeviceAttr>(S, D, Attr.getRange(), + Attr.getName()) || + checkAttrMutualExclusion<CUDAHostAttr>(S, D, Attr.getRange(), + Attr.getName())) { + return; + } FunctionDecl *FD = cast<FunctionDecl>(D); if (!FD->getReturnType()->isVoidType()) { SourceRange RTRange = FD->getReturnTypeSourceRange(); @@ -4558,14 +4583,6 @@ static void handleInterruptAttr(Sema &S, Decl *D, const AttributeList &Attr) { handleARMInterruptAttr(S, D, Attr); } -static void handleMips16Attribute(Sema &S, Decl *D, const AttributeList &Attr) { - if (checkAttrMutualExclusion<MipsInterruptAttr>(S, D, Attr.getRange(), - Attr.getName())) - return; - - handleSimpleAttribute<Mips16Attr>(S, D, Attr); -} - static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D, const AttributeList &Attr) { uint32_t NumRegs; @@ -4955,7 +4972,8 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, handleDLLAttr(S, D, Attr); break; case AttributeList::AT_Mips16: - handleMips16Attribute(S, D, Attr); + handleSimpleAttributeWithExclusions<Mips16Attr, MipsInterruptAttr>(S, D, + Attr); break; case AttributeList::AT_NoMips16: handleSimpleAttribute<NoMips16Attr>(S, D, Attr); @@ -5006,7 +5024,8 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, handleCommonAttr(S, D, Attr); break; case AttributeList::AT_CUDAConstant: - handleSimpleAttribute<CUDAConstantAttr>(S, D, Attr); + handleSimpleAttributeWithExclusions<CUDAConstantAttr, CUDASharedAttr>(S, D, + Attr); break; case AttributeList::AT_PassObjectSize: handlePassObjectSizeAttr(S, D, Attr); @@ -5051,10 +5070,12 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, handleGlobalAttr(S, D, Attr); break; case AttributeList::AT_CUDADevice: - handleSimpleAttribute<CUDADeviceAttr>(S, D, Attr); + handleSimpleAttributeWithExclusions<CUDADeviceAttr, CUDAGlobalAttr>(S, D, + Attr); break; case AttributeList::AT_CUDAHost: - handleSimpleAttribute<CUDAHostAttr>(S, D, Attr); + handleSimpleAttributeWithExclusions<CUDAHostAttr, CUDAGlobalAttr>(S, D, + Attr); break; case AttributeList::AT_GNUInline: handleGNUInlineAttr(S, D, Attr); @@ -5114,7 +5135,8 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, handleSimpleAttribute<NoThrowAttr>(S, D, Attr); break; case AttributeList::AT_CUDAShared: - handleSimpleAttribute<CUDASharedAttr>(S, D, Attr); + handleSimpleAttributeWithExclusions<CUDASharedAttr, CUDAConstantAttr>(S, D, + Attr); break; case AttributeList::AT_VecReturn: handleVecReturnAttr(S, D, Attr); diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp index 02091a7..11f2329 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp @@ -7000,6 +7000,7 @@ void Sema::CheckConversionDeclarator(Declarator &D, QualType &R, case DeclaratorChunk::BlockPointer: case DeclaratorChunk::Reference: case DeclaratorChunk::MemberPointer: + case DeclaratorChunk::Pipe: extendLeft(Before, Chunk.getSourceRange()); break; @@ -7796,6 +7797,10 @@ bool Sema::CheckUsingShadowDecl(UsingDecl *Using, NamedDecl *Orig, if (UsingShadowDecl *Shadow = dyn_cast<UsingShadowDecl>(*I)) PrevShadow = Shadow; FoundEquivalentDecl = true; + } else if (isEquivalentInternalLinkageDeclaration(D, Target)) { + // We don't conflict with an existing using shadow decl of an equivalent + // declaration, but we're not a redeclaration of it. + FoundEquivalentDecl = true; } if (isVisible(D)) diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp index 5d0c605..3e89af6 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp @@ -3084,6 +3084,8 @@ ExprResult Sema::ActOnCharacterConstant(const Token &Tok, Scope *UDLScope) { Kind = CharacterLiteral::UTF16; else if (Literal.isUTF32()) Kind = CharacterLiteral::UTF32; + else if (Literal.isUTF8()) + Kind = CharacterLiteral::UTF8; Expr *Lit = new (Context) CharacterLiteral(Literal.getValue(), Kind, Ty, Tok.getLocation()); @@ -4313,10 +4315,16 @@ ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc, if (Result.isInvalid()) return ExprError(); - Expr *Arg = Result.getAs<Expr>(); - CheckCompletedExpr(Arg, Param->getOuterLocStart()); - // Build the default argument expression. - return CXXDefaultArgExpr::Create(Context, CallLoc, Param, Arg); + Result = ActOnFinishFullExpr(Result.getAs<Expr>(), + Param->getOuterLocStart()); + if (Result.isInvalid()) + return ExprError(); + + // Remember the instantiated default argument. + Param->setDefaultArg(Result.getAs<Expr>()); + if (ASTMutationListener *L = getASTMutationListener()) { + L->DefaultArgumentInstantiated(Param); + } } // If the default expression creates temporaries, we need to @@ -4929,7 +4937,9 @@ Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc, OverloadExpr *ovl = find.Expression; if (UnresolvedLookupExpr *ULE = dyn_cast<UnresolvedLookupExpr>(ovl)) return BuildOverloadedCallExpr(S, Fn, ULE, LParenLoc, ArgExprs, - RParenLoc, ExecConfig); + RParenLoc, ExecConfig, + /*AllowTypoCorrection=*/true, + find.IsAddressOfOperand); return BuildCallToMemberFunction(S, Fn, LParenLoc, ArgExprs, RParenLoc); } } @@ -4943,10 +4953,14 @@ Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc, Expr *NakedFn = Fn->IgnoreParens(); + bool CallingNDeclIndirectly = false; NamedDecl *NDecl = nullptr; - if (UnaryOperator *UnOp = dyn_cast<UnaryOperator>(NakedFn)) - if (UnOp->getOpcode() == UO_AddrOf) + if (UnaryOperator *UnOp = dyn_cast<UnaryOperator>(NakedFn)) { + if (UnOp->getOpcode() == UO_AddrOf) { + CallingNDeclIndirectly = true; NakedFn = UnOp->getSubExpr()->IgnoreParens(); + } + } if (isa<DeclRefExpr>(NakedFn)) { NDecl = cast<DeclRefExpr>(NakedFn)->getDecl(); @@ -4968,6 +4982,11 @@ Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc, NDecl = cast<MemberExpr>(NakedFn)->getMemberDecl(); if (FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(NDecl)) { + if (CallingNDeclIndirectly && + !checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true, + Fn->getLocStart())) + return ExprError(); + if (FD->hasAttr<EnableIfAttr>()) { if (const EnableIfAttr *Attr = CheckEnableIf(FD, ArgExprs, true)) { Diag(Fn->getLocStart(), @@ -5583,6 +5602,39 @@ bool Sema::CheckVectorCast(SourceRange R, QualType VectorTy, QualType Ty, return false; } +ExprResult Sema::prepareVectorSplat(QualType VectorTy, Expr *SplattedExpr) { + QualType DestElemTy = VectorTy->castAs<VectorType>()->getElementType(); + + if (DestElemTy == SplattedExpr->getType()) + return SplattedExpr; + + assert(DestElemTy->isFloatingType() || + DestElemTy->isIntegralOrEnumerationType()); + + CastKind CK; + if (VectorTy->isExtVectorType() && SplattedExpr->getType()->isBooleanType()) { + // OpenCL requires that we convert `true` boolean expressions to -1, but + // only when splatting vectors. + if (DestElemTy->isFloatingType()) { + // To avoid having to have a CK_BooleanToSignedFloating cast kind, we cast + // in two steps: boolean to signed integral, then to floating. + ExprResult CastExprRes = ImpCastExprToType(SplattedExpr, Context.IntTy, + CK_BooleanToSignedIntegral); + SplattedExpr = CastExprRes.get(); + CK = CK_IntegralToFloating; + } else { + CK = CK_BooleanToSignedIntegral; + } + } else { + ExprResult CastExprRes = SplattedExpr; + CK = PrepareScalarCast(CastExprRes, DestElemTy); + if (CastExprRes.isInvalid()) + return ExprError(); + SplattedExpr = CastExprRes.get(); + } + return ImpCastExprToType(SplattedExpr, DestElemTy, CK); +} + ExprResult Sema::CheckExtVectorCast(SourceRange R, QualType DestTy, Expr *CastExpr, CastKind &Kind) { assert(DestTy->isExtVectorType() && "Not an extended vector type!"); @@ -5613,15 +5665,8 @@ ExprResult Sema::CheckExtVectorCast(SourceRange R, QualType DestTy, diag::err_invalid_conversion_between_vector_and_scalar) << DestTy << SrcTy << R; - QualType DestElemTy = DestTy->getAs<ExtVectorType>()->getElementType(); - ExprResult CastExprRes = CastExpr; - CastKind CK = PrepareScalarCast(CastExprRes, DestElemTy); - if (CastExprRes.isInvalid()) - return ExprError(); - CastExpr = ImpCastExprToType(CastExprRes.get(), DestElemTy, CK).get(); - Kind = CK_VectorSplat; - return CastExpr; + return prepareVectorSplat(DestTy, CastExpr); } ExprResult @@ -6960,13 +7005,9 @@ Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS, if (RHSType->isExtVectorType()) return Incompatible; if (RHSType->isArithmeticType()) { - // CK_VectorSplat does T -> vector T, so first cast to the - // element type. - QualType elType = cast<ExtVectorType>(LHSType)->getElementType(); - if (elType != RHSType && ConvertRHS) { - Kind = PrepareScalarCast(RHS, elType); - RHS = ImpCastExprToType(RHS.get(), elType, Kind); - } + // CK_VectorSplat does T -> vector T, so first cast to the element type. + if (ConvertRHS) + RHS = prepareVectorSplat(LHSType, RHS.get()); Kind = CK_VectorSplat; return Compatible; } @@ -8184,7 +8225,7 @@ static QualType checkOpenCLVectorShift(Sema &S, if (RHS.isInvalid()) return QualType(); QualType LHSType = LHS.get()->getType(); - const VectorType *LHSVecTy = LHSType->getAs<VectorType>(); + const VectorType *LHSVecTy = LHSType->castAs<VectorType>(); QualType LHSEleType = LHSVecTy->getElementType(); // Note that RHS might not be a vector. @@ -13121,6 +13162,7 @@ bool Sema::tryCaptureVariable( case Type::ObjCObject: case Type::ObjCInterface: case Type::ObjCObjectPointer: + case Type::Pipe: llvm_unreachable("type class is never variably-modified!"); case Type::Adjusted: QTy = cast<AdjustedType>(Ty)->getOriginalType(); diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp index 2ad595f..38fbea1 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp @@ -3353,20 +3353,13 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, VK_RValue, /*BasePath=*/nullptr, CCK).get(); break; - case ICK_Vector_Splat: + case ICK_Vector_Splat: { // Vector splat from any arithmetic type to a vector. - // Cast to the element type. - { - QualType elType = ToType->getAs<ExtVectorType>()->getElementType(); - if (elType != From->getType()) { - ExprResult E = From; - From = ImpCastExprToType(From, elType, - PrepareScalarCast(E, elType)).get(); - } - From = ImpCastExprToType(From, ToType, CK_VectorSplat, - VK_RValue, /*BasePath=*/nullptr, CCK).get(); - } + Expr *Elem = prepareVectorSplat(ToType, From).get(); + From = ImpCastExprToType(Elem, ToType, CK_VectorSplat, VK_RValue, + /*BasePath=*/nullptr, CCK).get(); break; + } case ICK_Complex_Real: // Case 1. x -> _Complex y diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExprObjC.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExprObjC.cpp index 57a08b9..1d86ca3 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaExprObjC.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaExprObjC.cpp @@ -319,6 +319,7 @@ ExprResult Sema::BuildObjCNumericLiteral(SourceLocation AtLoc, Expr *Number) { // to use to determine the Objective-c literal kind. switch (Char->getKind()) { case CharacterLiteral::Ascii: + case CharacterLiteral::UTF8: NumberType = Context.CharTy; break; @@ -577,6 +578,7 @@ ExprResult Sema::BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) { // to use to determine the Objective-c literal kind. switch (Char->getKind()) { case CharacterLiteral::Ascii: + case CharacterLiteral::UTF8: ValueType = Context.CharTy; break; diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp index 481ae6c..45dc2e3 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp @@ -650,6 +650,13 @@ void LookupResult::print(raw_ostream &Out) { } } +LLVM_DUMP_METHOD void LookupResult::dump() { + llvm::errs() << "lookup results for " << getLookupName().getAsString() + << ":\n"; + for (NamedDecl *D : *this) + D->dump(); +} + /// \brief Lookup a builtin function, when name lookup would otherwise /// fail. static bool LookupBuiltin(Sema &S, LookupResult &R) { @@ -2616,6 +2623,9 @@ addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType Ty) { case Type::Atomic: T = cast<AtomicType>(T)->getValueType().getTypePtr(); continue; + case Type::Pipe: + T = cast<PipeType>(T)->getElementType().getTypePtr(); + continue; } if (Queue.empty()) @@ -4988,3 +4998,12 @@ const Sema::TypoExprState &Sema::getTypoExprState(TypoExpr *TE) const { void Sema::clearDelayedTypo(TypoExpr *TE) { DelayedTypos.erase(TE); } + +void Sema::ActOnPragmaDump(Scope *S, SourceLocation IILoc, IdentifierInfo *II) { + DeclarationNameInfo Name(II, IILoc); + LookupResult R(*this, Name, LookupAnyName, Sema::NotForRedeclaration); + R.suppressDiagnostics(); + R.setHideTags(false); + LookupName(R, S); + R.dump(); +} diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp index e0c10e4..663da0c 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp @@ -258,6 +258,7 @@ static const Expr *IgnoreNarrowingConversion(const Expr *Converted) { case CK_IntegralCast: case CK_IntegralToBoolean: case CK_IntegralToFloating: + case CK_BooleanToSignedIntegral: case CK_FloatingToIntegral: case CK_FloatingToBoolean: case CK_FloatingCast: @@ -9643,6 +9644,13 @@ static void NoteFunctionCandidate(Sema &S, OverloadCandidate *Cand, case ovl_fail_enable_if: return DiagnoseFailedEnableIfAttr(S, Cand); + + case ovl_fail_addr_not_available: { + bool Available = checkAddressOfCandidateIsAvailable(S, Cand->Function); + (void)Available; + assert(!Available); + break; + } } } @@ -11245,6 +11253,17 @@ static ExprResult FinishOverloadedCallExpr(Sema &SemaRef, Scope *S, Expr *Fn, return ExprError(); } +static void markUnaddressableCandidatesUnviable(Sema &S, + OverloadCandidateSet &CS) { + for (auto I = CS.begin(), E = CS.end(); I != E; ++I) { + if (I->Viable && + !S.checkAddressOfFunctionIsAvailable(I->Function, /*Complain=*/false)) { + I->Viable = false; + I->FailureKind = ovl_fail_addr_not_available; + } + } +} + /// BuildOverloadedCallExpr - Given the call expression that calls Fn /// (which eventually refers to the declaration Func) and the call /// arguments Args/NumArgs, attempt to resolve the function call down @@ -11257,7 +11276,8 @@ ExprResult Sema::BuildOverloadedCallExpr(Scope *S, Expr *Fn, MultiExprArg Args, SourceLocation RParenLoc, Expr *ExecConfig, - bool AllowTypoCorrection) { + bool AllowTypoCorrection, + bool CalleesAddressIsTaken) { OverloadCandidateSet CandidateSet(Fn->getExprLoc(), OverloadCandidateSet::CSK_Normal); ExprResult result; @@ -11266,6 +11286,11 @@ ExprResult Sema::BuildOverloadedCallExpr(Scope *S, Expr *Fn, &result)) return result; + // If the user handed us something like `(&Foo)(Bar)`, we need to ensure that + // functions that aren't addressible are considered unviable. + if (CalleesAddressIsTaken) + markUnaddressableCandidatesUnviable(*this, CandidateSet); + OverloadCandidateSet::iterator Best; OverloadingResult OverloadResult = CandidateSet.BestViableFunction(*this, Fn->getLocStart(), Best); diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp index 6cc8588..5715607 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp @@ -4184,6 +4184,10 @@ bool UnnamedLocalNoLinkageFinder::VisitAtomicType(const AtomicType* T) { return Visit(T->getValueType()); } +bool UnnamedLocalNoLinkageFinder::VisitPipeType(const PipeType* T) { + return false; +} + bool UnnamedLocalNoLinkageFinder::VisitTagDecl(const TagDecl *Tag) { if (Tag->getDeclContext()->isFunctionOrMethod()) { S.Diag(SR.getBegin(), @@ -5503,6 +5507,8 @@ Sema::BuildExpressionFromIntegralTemplateArgument(const TemplateArgument &Arg, Expr *E; if (T->isAnyCharacterType()) { + // This does not need to handle u8 character literals because those are + // of type char, and so can also be covered by an ASCII character literal. CharacterLiteral::CharacterKind Kind; if (T->isWideCharType()) Kind = CharacterLiteral::Wide; diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp index cd54920..71faafc 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -1652,6 +1652,7 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S, case Type::Auto: case Type::DependentTemplateSpecialization: case Type::PackExpansion: + case Type::Pipe: // No template argument deduction for these types return Sema::TDK_Success; } @@ -4964,6 +4965,7 @@ MarkUsedTemplateParameters(ASTContext &Ctx, QualType T, case Type::ObjCObject: case Type::ObjCObjectPointer: case Type::UnresolvedUsing: + case Type::Pipe: #define TYPE(Class, Base) #define ABSTRACT_TYPE(Class, Base) #define DEPENDENT_TYPE(Class, Base) diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateVariadic.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateVariadic.cpp index 61052f0..cb67d71 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateVariadic.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateVariadic.cpp @@ -750,6 +750,7 @@ bool Sema::containsUnexpandedParameterPacks(Declarator &D) { case DeclaratorChunk::Pointer: case DeclaratorChunk::Reference: case DeclaratorChunk::Paren: + case DeclaratorChunk::Pipe: case DeclaratorChunk::BlockPointer: // These declarator chunks cannot contain any parameter packs. break; diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp index c70568c..f6ad132 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp @@ -335,6 +335,7 @@ static DeclaratorChunk *maybeMovePastReturnType(Declarator &declarator, case DeclaratorChunk::Array: case DeclaratorChunk::Reference: case DeclaratorChunk::MemberPointer: + case DeclaratorChunk::Pipe: return result; // If we do find a function declarator, scan inwards from that, @@ -347,6 +348,7 @@ static DeclaratorChunk *maybeMovePastReturnType(Declarator &declarator, case DeclaratorChunk::Array: case DeclaratorChunk::Function: case DeclaratorChunk::Reference: + case DeclaratorChunk::Pipe: continue; case DeclaratorChunk::MemberPointer: @@ -427,6 +429,7 @@ static void distributeObjCPointerTypeAttr(TypeProcessingState &state, // Don't walk through these. case DeclaratorChunk::Reference: case DeclaratorChunk::MemberPointer: + case DeclaratorChunk::Pipe: goto error; } } @@ -459,6 +462,7 @@ distributeObjCPointerTypeAttrFromDeclarator(TypeProcessingState &state, case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Paren: case DeclaratorChunk::Array: + case DeclaratorChunk::Pipe: continue; case DeclaratorChunk::Function: @@ -520,6 +524,7 @@ static void distributeFunctionTypeAttr(TypeProcessingState &state, case DeclaratorChunk::Array: case DeclaratorChunk::Reference: case DeclaratorChunk::MemberPointer: + case DeclaratorChunk::Pipe: continue; } } @@ -1272,6 +1277,10 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) { // value being declared, poison it as invalid so we don't get chains of // errors. declarator.setInvalidType(true); + } else if (S.getLangOpts().OpenCLVersion >= 200 && DS.isTypeSpecPipe()){ + S.Diag(DeclLoc, diag::err_missing_actual_pipe_type) + << DS.getSourceRange(); + declarator.setInvalidType(true); } else { S.Diag(DeclLoc, diag::ext_missing_type_specifier) << DS.getSourceRange(); @@ -1564,7 +1573,9 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) { // Apply any type attributes from the decl spec. This may cause the // list of type attributes to be temporarily saved while the type // attributes are pushed around. - processTypeAttrs(state, Result, TAL_DeclSpec, DS.getAttributes().getList()); + // pipe attributes will be handled later ( at GetFullTypeForDeclarator ) + if (!DS.isTypeSpecPipe()) + processTypeAttrs(state, Result, TAL_DeclSpec, DS.getAttributes().getList()); // Apply const/volatile/restrict qualifiers to T. if (unsigned TypeQuals = DS.getTypeQualifiers()) { @@ -1924,6 +1935,21 @@ QualType Sema::BuildReferenceType(QualType T, bool SpelledAsLValue, return Context.getRValueReferenceType(T); } +/// \brief Build a Pipe type. +/// +/// \param T The type to which we'll be building a Pipe. +/// +/// \param Loc We do not use it for now. +/// +/// \returns A suitable pipe type, if there are no errors. Otherwise, returns a +/// NULL type. +QualType Sema::BuildPipeType(QualType T, SourceLocation Loc) { + assert(!T->isObjCObjectType() && "Should build ObjCObjectPointerType"); + + // Build the pipe type. + return Context.getPipeType(T); +} + /// Check whether the specified array size makes the array type a VLA. If so, /// return true, if not, return the size of the array in SizeVal. static bool isArraySizeVLA(Sema &S, Expr *ArraySize, llvm::APSInt &SizeVal) { @@ -2393,6 +2419,7 @@ static void inferARCWriteback(TypeProcessingState &state, case DeclaratorChunk::Array: // suppress if written (id[])? case DeclaratorChunk::Function: case DeclaratorChunk::MemberPointer: + case DeclaratorChunk::Pipe: return; } } @@ -2532,6 +2559,7 @@ static void diagnoseRedundantReturnTypeQualifiers(Sema &S, QualType RetTy, case DeclaratorChunk::Reference: case DeclaratorChunk::Array: case DeclaratorChunk::MemberPointer: + case DeclaratorChunk::Pipe: // FIXME: We can't currently provide an accurate source location and a // fix-it hint for these. unsigned AtomicQual = RetTy->isAtomicType() ? DeclSpec::TQ_atomic : 0; @@ -3057,6 +3085,7 @@ static PointerDeclaratorKind classifyPointerDeclarator(Sema &S, switch (chunk.Kind) { case DeclaratorChunk::Array: case DeclaratorChunk::Function: + case DeclaratorChunk::Pipe: break; case DeclaratorChunk::BlockPointer: @@ -3305,6 +3334,8 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, case DeclaratorChunk::Array: DiagKind = 2; break; + case DeclaratorChunk::Pipe: + break; } S.Diag(DeclChunk.Loc, DiagId) << DiagKind; @@ -3370,6 +3401,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, switch (chunk.Kind) { case DeclaratorChunk::Array: case DeclaratorChunk::Function: + case DeclaratorChunk::Pipe: break; case DeclaratorChunk::BlockPointer: @@ -3689,6 +3721,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, break; case DeclaratorChunk::Function: case DeclaratorChunk::BlockPointer: + case DeclaratorChunk::Pipe: // These are invalid anyway, so just ignore. break; } @@ -4038,7 +4071,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, break; } - case DeclaratorChunk::MemberPointer: + case DeclaratorChunk::MemberPointer: { // The scope spec must refer to a class, or be dependent. CXXScopeSpec &SS = DeclType.Mem.Scope(); QualType ClsType; @@ -4098,6 +4131,12 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, break; } + case DeclaratorChunk::Pipe: { + T = S.BuildPipeType(T, DeclType.Loc ); + break; + } + } + if (T.isNull()) { D.setInvalidType(true); T = Context.IntTy; @@ -4392,6 +4431,7 @@ static void transferARCOwnership(TypeProcessingState &state, case DeclaratorChunk::Function: case DeclaratorChunk::MemberPointer: + case DeclaratorChunk::Pipe: return; } } @@ -4682,6 +4722,14 @@ namespace { } } + void VisitPipeTypeLoc(PipeTypeLoc TL) { + TL.setKWLoc(DS.getTypeSpecTypeLoc()); + + TypeSourceInfo *TInfo = 0; + Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo); + TL.getValueLoc().initializeFullCopy(TInfo->getTypeLoc()); + } + void VisitTypeLoc(TypeLoc TL) { // FIXME: add other typespec types and change this to an assert. TL.initialize(Context, DS.getTypeSpecTypeLoc()); @@ -4802,6 +4850,10 @@ namespace { TL.setLParenLoc(Chunk.Loc); TL.setRParenLoc(Chunk.EndLoc); } + void VisitPipeTypeLoc(PipeTypeLoc TL) { + assert(Chunk.Kind == DeclaratorChunk::Pipe); + TL.setKWLoc(Chunk.Loc); + } void VisitTypeLoc(TypeLoc TL) { llvm_unreachable("unsupported TypeLoc kind in declarator!"); @@ -4815,6 +4867,7 @@ static void fillAtomicQualLoc(AtomicTypeLoc ATL, const DeclaratorChunk &Chunk) { case DeclaratorChunk::Function: case DeclaratorChunk::Array: case DeclaratorChunk::Paren: + case DeclaratorChunk::Pipe: llvm_unreachable("cannot be _Atomic qualified"); case DeclaratorChunk::Pointer: @@ -5738,6 +5791,7 @@ static bool distributeNullabilityTypeAttr(TypeProcessingState &state, // Don't walk through these. case DeclaratorChunk::Reference: + case DeclaratorChunk::Pipe: return false; } } diff --git a/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h b/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h index e0a9653..935304f 100644 --- a/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h +++ b/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h @@ -1046,6 +1046,9 @@ public: /// Subclasses may override this routine to provide different behavior. QualType RebuildAtomicType(QualType ValueType, SourceLocation KWLoc); + /// \brief Build a new pipe type given its value type. + QualType RebuildPipeType(QualType ValueType, SourceLocation KWLoc); + /// \brief Build a new template name given a nested name specifier, a flag /// indicating whether the "template" keyword was provided, and the template /// that the template name refers to. @@ -3580,7 +3583,7 @@ void TreeTransform<Derived>::InventTemplateArgumentLoc( case TemplateArgument::Template: case TemplateArgument::TemplateExpansion: { NestedNameSpecifierLocBuilder Builder; - TemplateName Template = Arg.getAsTemplate(); + TemplateName Template = Arg.getAsTemplateOrTemplatePattern(); if (DependentTemplateName *DTN = Template.getAsDependentTemplateName()) Builder.MakeTrivial(SemaRef.Context, DTN->getQualifier(), Loc); else if (QualifiedTemplateName *QTN = Template.getAsQualifiedTemplateName()) @@ -5324,6 +5327,26 @@ QualType TreeTransform<Derived>::TransformAtomicType(TypeLocBuilder &TLB, return Result; } +template <typename Derived> +QualType TreeTransform<Derived>::TransformPipeType(TypeLocBuilder &TLB, + PipeTypeLoc TL) { + QualType ValueType = getDerived().TransformType(TLB, TL.getValueLoc()); + if (ValueType.isNull()) + return QualType(); + + QualType Result = TL.getType(); + if (getDerived().AlwaysRebuild() || ValueType != TL.getValueLoc().getType()) { + Result = getDerived().RebuildPipeType(ValueType, TL.getKWLoc()); + if (Result.isNull()) + return QualType(); + } + + PipeTypeLoc NewTL = TLB.push<PipeTypeLoc>(Result); + NewTL.setKWLoc(TL.getKWLoc()); + + return Result; +} + /// \brief Simple iterator that traverses the template arguments in a /// container that provides a \c getArgLoc() member function. /// @@ -6128,7 +6151,7 @@ TreeTransform<Derived>::TransformIfStmt(IfStmt *S) { } } - Sema::FullExprArg FullCond(getSema().MakeFullExpr(Cond.get())); + Sema::FullExprArg FullCond(getSema().MakeFullExpr(Cond.get(), S->getIfLoc())); if (!S->getConditionVariable() && S->getCond() && !FullCond.get()) return StmtError(); @@ -6223,7 +6246,8 @@ TreeTransform<Derived>::TransformWhileStmt(WhileStmt *S) { } } - Sema::FullExprArg FullCond(getSema().MakeFullExpr(Cond.get())); + Sema::FullExprArg FullCond( + getSema().MakeFullExpr(Cond.get(), S->getWhileLoc())); if (!S->getConditionVariable() && S->getCond() && !FullCond.get()) return StmtError(); @@ -6307,7 +6331,8 @@ TreeTransform<Derived>::TransformForStmt(ForStmt *S) { } } - Sema::FullExprArg FullCond(getSema().MakeFullExpr(Cond.get())); + Sema::FullExprArg FullCond( + getSema().MakeFullExpr(Cond.get(), S->getForLoc())); if (!S->getConditionVariable() && S->getCond() && !FullCond.get()) return StmtError(); @@ -11348,6 +11373,12 @@ QualType TreeTransform<Derived>::RebuildAtomicType(QualType ValueType, } template<typename Derived> +QualType TreeTransform<Derived>::RebuildPipeType(QualType ValueType, + SourceLocation KWLoc) { + return SemaRef.BuildPipeType(ValueType, KWLoc); +} + +template<typename Derived> TemplateName TreeTransform<Derived>::RebuildTemplateName(CXXScopeSpec &SS, bool TemplateKW, diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTCommon.h b/contrib/llvm/tools/clang/lib/Serialization/ASTCommon.h index e59bc89..64f583c 100644 --- a/contrib/llvm/tools/clang/lib/Serialization/ASTCommon.h +++ b/contrib/llvm/tools/clang/lib/Serialization/ASTCommon.h @@ -29,6 +29,7 @@ enum DeclUpdateKind { UPD_CXX_ADDED_FUNCTION_DEFINITION, UPD_CXX_INSTANTIATED_STATIC_DATA_MEMBER, UPD_CXX_INSTANTIATED_CLASS_DEFINITION, + UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT, UPD_CXX_RESOLVED_DTOR_DELETE, UPD_CXX_RESOLVED_EXCEPTION_SPEC, UPD_CXX_DEDUCED_RETURN_TYPE, diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp index a279475..833ff57 100644 --- a/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp +++ b/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp @@ -5640,6 +5640,17 @@ QualType ASTReader::readTypeRecord(unsigned Index) { QualType ValueType = readType(*Loc.F, Record, Idx); return Context.getAtomicType(ValueType); } + + case TYPE_PIPE: { + if (Record.size() != 1) { + Error("Incorrect encoding of pipe type"); + return QualType(); + } + + // Reading the pipe element type. + QualType ElementType = readType(*Loc.F, Record, Idx); + return Context.getPipeType(ElementType); + } } llvm_unreachable("Invalid TypeCode!"); } @@ -5911,6 +5922,9 @@ void TypeLocReader::VisitAtomicTypeLoc(AtomicTypeLoc TL) { TL.setLParenLoc(ReadSourceLocation(Record, Idx)); TL.setRParenLoc(ReadSourceLocation(Record, Idx)); } +void TypeLocReader::VisitPipeTypeLoc(PipeTypeLoc TL) { + TL.setKWLoc(ReadSourceLocation(Record, Idx)); +} TypeSourceInfo *ASTReader::GetTypeSourceInfo(ModuleFile &F, const RecordData &Record, diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp index 8fb110e..5bf95f8 100644 --- a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp @@ -3626,6 +3626,21 @@ void ASTDeclReader::UpdateDecl(Decl *D, ModuleFile &ModuleFile, Reader.ReadSourceLocation(ModuleFile, Record, Idx)); break; + case UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT: { + auto Param = cast<ParmVarDecl>(D); + + // We have to read the default argument regardless of whether we use it + // so that hypothetical further update records aren't messed up. + // TODO: Add a function to skip over the next expr record. + auto DefaultArg = Reader.ReadExpr(F); + + // Only apply the update if the parameter still has an uninstantiated + // default argument. + if (Param->hasUninstantiatedDefaultArg()) + Param->setDefaultArg(DefaultArg); + break; + } + case UPD_CXX_ADDED_FUNCTION_DEFINITION: { FunctionDecl *FD = cast<FunctionDecl>(D); if (Reader.PendingBodies[FD]) { diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp index bc678af..ad81ac8 100644 --- a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp @@ -1364,10 +1364,7 @@ void ASTStmtReader::VisitCXXThrowExpr(CXXThrowExpr *E) { void ASTStmtReader::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { VisitExpr(E); - - assert((bool)Record[Idx] == E->Param.getInt() && "We messed up at creation ?"); - ++Idx; // HasOtherExprStored and SubExpr was handled during creation. - E->Param.setPointer(ReadDeclAs<ParmVarDecl>(Record, Idx)); + E->Param = ReadDeclAs<ParmVarDecl>(Record, Idx); E->Loc = ReadSourceLocation(Record, Idx); } @@ -3205,16 +3202,9 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { case EXPR_CXX_THROW: S = new (Context) CXXThrowExpr(Empty); break; - case EXPR_CXX_DEFAULT_ARG: { - bool HasOtherExprStored = Record[ASTStmtReader::NumExprFields]; - if (HasOtherExprStored) { - Expr *SubExpr = ReadSubExpr(); - S = CXXDefaultArgExpr::Create(Context, SourceLocation(), nullptr, - SubExpr); - } else - S = new (Context) CXXDefaultArgExpr(Empty); + case EXPR_CXX_DEFAULT_ARG: + S = new (Context) CXXDefaultArgExpr(Empty); break; - } case EXPR_CXX_DEFAULT_INIT: S = new (Context) CXXDefaultInitExpr(Empty); break; diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp index 0f50d7a..ec04cd6 100644 --- a/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp +++ b/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp @@ -446,6 +446,12 @@ ASTTypeWriter::VisitAtomicType(const AtomicType *T) { Code = TYPE_ATOMIC; } +void +ASTTypeWriter::VisitPipeType(const PipeType *T) { + Writer.AddTypeRef(T->getElementType(), Record); + Code = TYPE_PIPE; +} + namespace { class TypeLocWriter : public TypeLocVisitor<TypeLocWriter> { @@ -672,6 +678,9 @@ void TypeLocWriter::VisitAtomicTypeLoc(AtomicTypeLoc TL) { Writer.AddSourceLocation(TL.getLParenLoc(), Record); Writer.AddSourceLocation(TL.getRParenLoc(), Record); } +void TypeLocWriter::VisitPipeTypeLoc(PipeTypeLoc TL) { + Writer.AddSourceLocation(TL.getKWLoc(), Record); +} void ASTWriter::WriteTypeAbbrevs() { using namespace llvm; @@ -4611,6 +4620,11 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) { AddSourceLocation(Update.getLoc(), Record); break; + case UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT: + AddStmt(const_cast<Expr*>( + cast<ParmVarDecl>(Update.getDecl())->getDefaultArg())); + break; + case UPD_CXX_INSTANTIATED_CLASS_DEFINITION: { auto *RD = cast<CXXRecordDecl>(D); UpdatedDeclContexts.insert(RD->getPrimaryContext()); @@ -5779,6 +5793,15 @@ void ASTWriter::StaticDataMemberInstantiated(const VarDecl *D) { D->getMemberSpecializationInfo()->getPointOfInstantiation())); } +void ASTWriter::DefaultArgumentInstantiated(const ParmVarDecl *D) { + assert(!WritingAST && "Already writing the AST!"); + if (!D->isFromASTFile()) + return; + + DeclUpdates[D].push_back( + DeclUpdate(UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT, D)); +} + void ASTWriter::AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD, const ObjCInterfaceDecl *IFD) { assert(!WritingAST && "Already writing the AST!"); diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterDecl.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterDecl.cpp index 20ca6d6..54bba28 100644 --- a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterDecl.cpp @@ -2033,7 +2033,7 @@ void ASTWriter::WriteDeclAbbrevs() { //Character Literal Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // getValue Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Location - Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // getKind + Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // getKind CharacterLiteralAbbrev = Stream.EmitAbbrev(Abv); // Abbreviation for EXPR_IMPLICIT_CAST diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp index e52ed05..000a218 100644 --- a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp @@ -1336,15 +1336,8 @@ void ASTStmtWriter::VisitCXXThrowExpr(CXXThrowExpr *E) { void ASTStmtWriter::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { VisitExpr(E); - - bool HasOtherExprStored = E->Param.getInt(); - // Store these first, the reader reads them before creation. - Record.push_back(HasOtherExprStored); - if (HasOtherExprStored) - Writer.AddStmt(E->getExpr()); Writer.AddDeclRef(E->getParam(), Record); Writer.AddSourceLocation(E->getUsedLocation(), Record); - Code = serialization::EXPR_CXX_DEFAULT_ARG; } diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 5d78d9b..1753744 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -948,15 +948,15 @@ bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx, const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR); switch (MR->getKind()) { - case MemRegion::FunctionTextRegionKind: { - const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl(); + case MemRegion::FunctionCodeRegionKind: { + const NamedDecl *FD = cast<FunctionCodeRegion>(MR)->getDecl(); if (FD) os << "the address of the function '" << *FD << '\''; else os << "the address of a function"; return true; } - case MemRegion::BlockTextRegionKind: + case MemRegion::BlockCodeRegionKind: os << "block text"; return true; case MemRegion::BlockDataRegionKind: diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp index ce2c194..fee030f 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -1513,15 +1513,15 @@ bool MallocChecker::SummarizeValue(raw_ostream &os, SVal V) { bool MallocChecker::SummarizeRegion(raw_ostream &os, const MemRegion *MR) { switch (MR->getKind()) { - case MemRegion::FunctionTextRegionKind: { - const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl(); + case MemRegion::FunctionCodeRegionKind: { + const NamedDecl *FD = cast<FunctionCodeRegion>(MR)->getDecl(); if (FD) os << "the address of the function '" << *FD << '\''; else os << "the address of a function"; return true; } - case MemRegion::BlockTextRegionKind: + case MemRegion::BlockCodeRegionKind: os << "block text"; return true; case MemRegion::BlockDataRegionKind: diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp index a5b5871..175225b 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp @@ -316,7 +316,7 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex, case CK_ArrayToPointerDecay: case CK_BitCast: case CK_AddressSpaceConversion: - case CK_IntegralCast: + case CK_BooleanToSignedIntegral: case CK_NullToPointer: case CK_IntegralToPointer: case CK_PointerToIntegral: @@ -345,6 +345,17 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex, // Delegate to SValBuilder to process. SVal V = state->getSVal(Ex, LCtx); V = svalBuilder.evalCast(V, T, ExTy); + // Negate the result if we're treating the boolean as a signed i1 + if (CastE->getCastKind() == CK_BooleanToSignedIntegral) + V = evalMinus(V); + state = state->BindExpr(CastE, LCtx, V); + Bldr.generateNode(CastE, Pred, state); + continue; + } + case CK_IntegralCast: { + // Delegate to SValBuilder to process. + SVal V = state->getSVal(Ex, LCtx); + V = svalBuilder.evalIntegralCast(state, V, T, ExTy); state = state->BindExpr(CastE, LCtx, V); Bldr.generateNode(CastE, Pred, state); continue; diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/MemRegion.cpp index ad3f396..30052cc 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/MemRegion.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/MemRegion.cpp @@ -245,7 +245,7 @@ QualType CXXBaseObjectRegion::getValueType() const { // FoldingSet profiling. //===----------------------------------------------------------------------===// -void MemSpaceRegion::Profile(llvm::FoldingSetNodeID& ID) const { +void MemSpaceRegion::Profile(llvm::FoldingSetNodeID &ID) const { ID.AddInteger((unsigned)getKind()); } @@ -357,31 +357,31 @@ void ElementRegion::Profile(llvm::FoldingSetNodeID& ID) const { ElementRegion::ProfileRegion(ID, ElementType, Index, superRegion); } -void FunctionTextRegion::ProfileRegion(llvm::FoldingSetNodeID& ID, +void FunctionCodeRegion::ProfileRegion(llvm::FoldingSetNodeID& ID, const NamedDecl *FD, const MemRegion*) { - ID.AddInteger(MemRegion::FunctionTextRegionKind); + ID.AddInteger(MemRegion::FunctionCodeRegionKind); ID.AddPointer(FD); } -void FunctionTextRegion::Profile(llvm::FoldingSetNodeID& ID) const { - FunctionTextRegion::ProfileRegion(ID, FD, superRegion); +void FunctionCodeRegion::Profile(llvm::FoldingSetNodeID& ID) const { + FunctionCodeRegion::ProfileRegion(ID, FD, superRegion); } -void BlockTextRegion::ProfileRegion(llvm::FoldingSetNodeID& ID, +void BlockCodeRegion::ProfileRegion(llvm::FoldingSetNodeID& ID, const BlockDecl *BD, CanQualType, const AnalysisDeclContext *AC, const MemRegion*) { - ID.AddInteger(MemRegion::BlockTextRegionKind); + ID.AddInteger(MemRegion::BlockCodeRegionKind); ID.AddPointer(BD); } -void BlockTextRegion::Profile(llvm::FoldingSetNodeID& ID) const { - BlockTextRegion::ProfileRegion(ID, BD, locTy, AC, superRegion); +void BlockCodeRegion::Profile(llvm::FoldingSetNodeID& ID) const { + BlockCodeRegion::ProfileRegion(ID, BD, locTy, AC, superRegion); } void BlockDataRegion::ProfileRegion(llvm::FoldingSetNodeID& ID, - const BlockTextRegion *BC, + const BlockCodeRegion *BC, const LocationContext *LC, unsigned BlkCount, const MemRegion *sReg) { @@ -457,11 +457,11 @@ void AllocaRegion::dumpToStream(raw_ostream &os) const { os << "alloca{" << (const void*) Ex << ',' << Cnt << '}'; } -void FunctionTextRegion::dumpToStream(raw_ostream &os) const { +void FunctionCodeRegion::dumpToStream(raw_ostream &os) const { os << "code{" << getDecl()->getDeclName().getAsString() << '}'; } -void BlockTextRegion::dumpToStream(raw_ostream &os) const { +void BlockCodeRegion::dumpToStream(raw_ostream &os) const { os << "block_code{" << (const void*) this << '}'; } @@ -533,6 +533,10 @@ void RegionRawOffset::dumpToStream(raw_ostream &os) const { os << "raw_offset{" << getRegion() << ',' << getOffset().getQuantity() << '}'; } +void CodeSpaceRegion::dumpToStream(raw_ostream &os) const { + os << "CodeSpaceRegion"; +} + void StaticGlobalSpaceRegion::dumpToStream(raw_ostream &os) const { os << "StaticGlobalsMemSpace{" << CR << '}'; } @@ -711,11 +715,11 @@ const HeapSpaceRegion *MemRegionManager::getHeapRegion() { return LazyAllocate(heap); } -const MemSpaceRegion *MemRegionManager::getUnknownRegion() { +const UnknownSpaceRegion *MemRegionManager::getUnknownRegion() { return LazyAllocate(unknown); } -const MemSpaceRegion *MemRegionManager::getCodeRegion() { +const CodeSpaceRegion *MemRegionManager::getCodeRegion() { return LazyAllocate(code); } @@ -815,11 +819,11 @@ const VarRegion* MemRegionManager::getVarRegion(const VarDecl *D, const Decl *STCD = STC->getDecl(); if (isa<FunctionDecl>(STCD) || isa<ObjCMethodDecl>(STCD)) sReg = getGlobalsRegion(MemRegion::StaticGlobalSpaceRegionKind, - getFunctionTextRegion(cast<NamedDecl>(STCD))); + getFunctionCodeRegion(cast<NamedDecl>(STCD))); else if (const BlockDecl *BD = dyn_cast<BlockDecl>(STCD)) { // FIXME: The fallback type here is totally bogus -- though it should // never be queried, it will prevent uniquing with the real - // BlockTextRegion. Ideally we'd fix the AST so that we always had a + // BlockCodeRegion. Ideally we'd fix the AST so that we always had a // signature. QualType T; if (const TypeSourceInfo *TSI = BD->getSignatureAsWritten()) @@ -830,8 +834,8 @@ const VarRegion* MemRegionManager::getVarRegion(const VarDecl *D, T = getContext().getFunctionNoProtoType(T); T = getContext().getBlockPointerType(T); - const BlockTextRegion *BTR = - getBlockTextRegion(BD, C.getCanonicalType(T), + const BlockCodeRegion *BTR = + getBlockCodeRegion(BD, C.getCanonicalType(T), STC->getAnalysisDeclContext()); sReg = getGlobalsRegion(MemRegion::StaticGlobalSpaceRegionKind, BTR); @@ -852,7 +856,7 @@ const VarRegion *MemRegionManager::getVarRegion(const VarDecl *D, } const BlockDataRegion * -MemRegionManager::getBlockDataRegion(const BlockTextRegion *BC, +MemRegionManager::getBlockDataRegion(const BlockCodeRegion *BC, const LocationContext *LC, unsigned blockCount) { const MemRegion *sReg = nullptr; @@ -925,15 +929,15 @@ MemRegionManager::getElementRegion(QualType elementType, NonLoc Idx, return R; } -const FunctionTextRegion * -MemRegionManager::getFunctionTextRegion(const NamedDecl *FD) { - return getSubRegion<FunctionTextRegion>(FD, getCodeRegion()); +const FunctionCodeRegion * +MemRegionManager::getFunctionCodeRegion(const NamedDecl *FD) { + return getSubRegion<FunctionCodeRegion>(FD, getCodeRegion()); } -const BlockTextRegion * -MemRegionManager::getBlockTextRegion(const BlockDecl *BD, CanQualType locTy, +const BlockCodeRegion * +MemRegionManager::getBlockCodeRegion(const BlockDecl *BD, CanQualType locTy, AnalysisDeclContext *AC) { - return getSubRegion<BlockTextRegion>(BD, locTy, AC, getCodeRegion()); + return getSubRegion<BlockCodeRegion>(BD, locTy, AC, getCodeRegion()); } @@ -1196,7 +1200,7 @@ RegionOffset MemRegion::getAsOffset() const { while (1) { switch (R->getKind()) { - case GenericMemSpaceRegionKind: + case CodeSpaceRegionKind: case StackLocalsSpaceRegionKind: case StackArgumentsSpaceRegionKind: case HeapSpaceRegionKind: @@ -1209,8 +1213,8 @@ RegionOffset MemRegion::getAsOffset() const { assert(Offset == 0 && !SymbolicOffsetBase); goto Finish; - case FunctionTextRegionKind: - case BlockTextRegionKind: + case FunctionCodeRegionKind: + case BlockCodeRegionKind: case BlockDataRegionKind: // These will never have bindings, but may end up having values requested // if the user does some strange casting. diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ProgramState.cpp index 4f9ad9e..100fa75 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ProgramState.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ProgramState.cpp @@ -536,19 +536,19 @@ bool ScanReachableSymbols::scan(const SymExpr *sym) { // TODO: should be rewritten using SymExpr::symbol_iterator. switch (sym->getKind()) { - case SymExpr::RegionValueKind: - case SymExpr::ConjuredKind: - case SymExpr::DerivedKind: - case SymExpr::ExtentKind: - case SymExpr::MetadataKind: + case SymExpr::SymbolRegionValueKind: + case SymExpr::SymbolConjuredKind: + case SymExpr::SymbolDerivedKind: + case SymExpr::SymbolExtentKind: + case SymExpr::SymbolMetadataKind: break; - case SymExpr::CastSymbolKind: + case SymExpr::SymbolCastKind: return scan(cast<SymbolCast>(sym)->getOperand()); - case SymExpr::SymIntKind: + case SymExpr::SymIntExprKind: return scan(cast<SymIntExpr>(sym)->getLHS()); - case SymExpr::IntSymKind: + case SymExpr::IntSymExprKind: return scan(cast<IntSymExpr>(sym)->getRHS()); - case SymExpr::SymSymKind: { + case SymExpr::SymSymExprKind: { const SymSymExpr *x = cast<SymSymExpr>(sym); return scan(x->getLHS()) && scan(x->getRHS()); } diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 0a2b2e6..77b0ad3 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -171,7 +171,7 @@ private: case APSIntType::RTR_Below: // The entire range is outside the symbol's set of possible values. // If this is a conventionally-ordered range, the state is infeasible. - if (Lower < Upper) + if (Lower <= Upper) return false; // However, if the range wraps around, it spans all possible values. @@ -222,7 +222,7 @@ private: case APSIntType::RTR_Above: // The entire range is outside the symbol's set of possible values. // If this is a conventionally-ordered range, the state is infeasible. - if (Lower < Upper) + if (Lower <= Upper) return false; // However, if the range wraps around, it spans all possible values. diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp index cdae040..1831522 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp @@ -214,15 +214,15 @@ SValBuilder::getDerivedRegionValueSymbolVal(SymbolRef parentSymbol, } DefinedSVal SValBuilder::getFunctionPointer(const FunctionDecl *func) { - return loc::MemRegionVal(MemMgr.getFunctionTextRegion(func)); + return loc::MemRegionVal(MemMgr.getFunctionCodeRegion(func)); } DefinedSVal SValBuilder::getBlockPointer(const BlockDecl *block, CanQualType locTy, const LocationContext *locContext, unsigned blockCount) { - const BlockTextRegion *BC = - MemMgr.getBlockTextRegion(block, locTy, locContext->getAnalysisDeclContext()); + const BlockCodeRegion *BC = + MemMgr.getBlockCodeRegion(block, locTy, locContext->getAnalysisDeclContext()); const BlockDataRegion *BD = MemMgr.getBlockDataRegion(BC, locContext, blockCount); return loc::MemRegionVal(BD); @@ -423,6 +423,45 @@ static bool shouldBeModeledWithNoOp(ASTContext &Context, QualType ToTy, return true; } +// Handles casts of type CK_IntegralCast. +// At the moment, this function will redirect to evalCast, except when the range +// of the original value is known to be greater than the max of the target type. +SVal SValBuilder::evalIntegralCast(ProgramStateRef state, SVal val, + QualType castTy, QualType originalTy) { + + // No truncations if target type is big enough. + if (getContext().getTypeSize(castTy) >= getContext().getTypeSize(originalTy)) + return evalCast(val, castTy, originalTy); + + const SymExpr *se = val.getAsSymbolicExpression(); + if (!se) // Let evalCast handle non symbolic expressions. + return evalCast(val, castTy, originalTy); + + // Find the maximum value of the target type. + APSIntType ToType(getContext().getTypeSize(castTy), + castTy->isUnsignedIntegerType()); + llvm::APSInt ToTypeMax = ToType.getMaxValue(); + NonLoc ToTypeMaxVal = + makeIntVal(ToTypeMax.isUnsigned() ? ToTypeMax.getZExtValue() + : ToTypeMax.getSExtValue(), + castTy) + .castAs<NonLoc>(); + // Check the range of the symbol being casted against the maximum value of the + // target type. + NonLoc FromVal = val.castAs<NonLoc>(); + QualType CmpTy = getConditionType(); + NonLoc CompVal = + evalBinOpNN(state, BO_LT, FromVal, ToTypeMaxVal, CmpTy).castAs<NonLoc>(); + ProgramStateRef IsNotTruncated, IsTruncated; + std::tie(IsNotTruncated, IsTruncated) = state->assume(CompVal); + if (!IsNotTruncated && IsTruncated) { + // Symbol is truncated so we evaluate it as a cast. + NonLoc CastVal = makeNonLoc(se, originalTy, castTy); + return CastVal; + } + return evalCast(val, castTy, originalTy); +} + // FIXME: should rewrite according to the cast kind. SVal SValBuilder::evalCast(SVal val, QualType castTy, QualType originalTy) { castTy = Context.getCanonicalType(castTy); diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SVals.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SVals.cpp index 8de939f..dffee6c 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SVals.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SVals.cpp @@ -51,7 +51,7 @@ bool SVal::hasConjuredSymbol() const { const FunctionDecl *SVal::getAsFunctionDecl() const { if (Optional<loc::MemRegionVal> X = getAs<loc::MemRegionVal>()) { const MemRegion* R = X->getRegion(); - if (const FunctionTextRegion *CTR = R->getAs<FunctionTextRegion>()) + if (const FunctionCodeRegion *CTR = R->getAs<FunctionCodeRegion>()) if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(CTR->getDecl())) return FD; } @@ -240,7 +240,7 @@ void SVal::dump() const { dumpToStream(llvm::errs()); } void SVal::dumpToStream(raw_ostream &os) const { switch (getBaseKind()) { - case UnknownKind: + case UnknownValKind: os << "Unknown"; break; case NonLocKind: @@ -249,7 +249,7 @@ void SVal::dumpToStream(raw_ostream &os) const { case LocKind: castAs<Loc>().dumpToStream(os); break; - case UndefinedKind: + case UndefinedValKind: os << "Undefined"; break; } @@ -313,7 +313,7 @@ void Loc::dumpToStream(raw_ostream &os) const { case loc::GotoLabelKind: os << "&&" << castAs<loc::GotoLabel>().getLabel()->getName(); break; - case loc::MemRegionKind: + case loc::MemRegionValKind: os << '&' << castAs<loc::MemRegionVal>().getRegion()->getString(); break; default: diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp index a704ce2..72b852b 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -141,9 +141,9 @@ SVal SimpleSValBuilder::evalCastFromLoc(Loc val, QualType castTy) { // unless this is a weak function or a symbolic region. if (castTy->isBooleanType()) { switch (val.getSubKind()) { - case loc::MemRegionKind: { + case loc::MemRegionValKind: { const MemRegion *R = val.castAs<loc::MemRegionVal>().getRegion(); - if (const FunctionTextRegion *FTR = dyn_cast<FunctionTextRegion>(R)) + if (const FunctionCodeRegion *FTR = dyn_cast<FunctionCodeRegion>(R)) if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(FTR->getDecl())) if (FD->isWeak()) // FIXME: Currently we are using an extent symbol here, @@ -689,7 +689,7 @@ SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state, // completely unknowable. return UnknownVal(); } - case loc::MemRegionKind: { + case loc::MemRegionValKind: { if (Optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) { // If one of the operands is a symbol and the other is a constant, // build an expression for use by the constraint manager. @@ -718,7 +718,7 @@ SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state, // Get both values as regions, if possible. const MemRegion *LeftMR = lhs.getAsRegion(); - assert(LeftMR && "MemRegionKind SVal doesn't have a region!"); + assert(LeftMR && "MemRegionValKind SVal doesn't have a region!"); const MemRegion *RightMR = rhs.getAsRegion(); if (!RightMR) diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/Store.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/Store.cpp index 7cdb55a..de29f0e 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/Store.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/Store.cpp @@ -100,7 +100,7 @@ const MemRegion *StoreManager::castRegion(const MemRegion *R, QualType CastToTy) // Process region cast according to the kind of the region being cast. switch (R->getKind()) { case MemRegion::CXXThisRegionKind: - case MemRegion::GenericMemSpaceRegionKind: + case MemRegion::CodeSpaceRegionKind: case MemRegion::StackLocalsSpaceRegionKind: case MemRegion::StackArgumentsSpaceRegionKind: case MemRegion::HeapSpaceRegionKind: @@ -112,8 +112,8 @@ const MemRegion *StoreManager::castRegion(const MemRegion *R, QualType CastToTy) llvm_unreachable("Invalid region cast"); } - case MemRegion::FunctionTextRegionKind: - case MemRegion::BlockTextRegionKind: + case MemRegion::FunctionCodeRegionKind: + case MemRegion::BlockCodeRegionKind: case MemRegion::BlockDataRegionKind: case MemRegion::StringRegionKind: // FIXME: Need to handle arbitrary downcasts. @@ -393,7 +393,7 @@ SVal StoreManager::getLValueFieldOrIvar(const Decl *D, SVal Base) { const MemRegion* BaseR = nullptr; switch (BaseL.getSubKind()) { - case loc::MemRegionKind: + case loc::MemRegionValKind: BaseR = BaseL.castAs<loc::MemRegionVal>().getRegion(); break; diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp index 99b2e14..2dd252c 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp @@ -115,22 +115,22 @@ void SymExpr::symbol_iterator::expand() { const SymExpr *SE = itr.pop_back_val(); switch (SE->getKind()) { - case SymExpr::RegionValueKind: - case SymExpr::ConjuredKind: - case SymExpr::DerivedKind: - case SymExpr::ExtentKind: - case SymExpr::MetadataKind: + case SymExpr::SymbolRegionValueKind: + case SymExpr::SymbolConjuredKind: + case SymExpr::SymbolDerivedKind: + case SymExpr::SymbolExtentKind: + case SymExpr::SymbolMetadataKind: return; - case SymExpr::CastSymbolKind: + case SymExpr::SymbolCastKind: itr.push_back(cast<SymbolCast>(SE)->getOperand()); return; - case SymExpr::SymIntKind: + case SymExpr::SymIntExprKind: itr.push_back(cast<SymIntExpr>(SE)->getLHS()); return; - case SymExpr::IntSymKind: + case SymExpr::IntSymExprKind: itr.push_back(cast<IntSymExpr>(SE)->getRHS()); return; - case SymExpr::SymSymKind: { + case SymExpr::SymSymExprKind: { const SymSymExpr *x = cast<SymSymExpr>(SE); itr.push_back(x->getLHS()); itr.push_back(x->getRHS()); @@ -458,35 +458,35 @@ bool SymbolReaper::isLive(SymbolRef sym) { bool KnownLive; switch (sym->getKind()) { - case SymExpr::RegionValueKind: + case SymExpr::SymbolRegionValueKind: KnownLive = isLiveRegion(cast<SymbolRegionValue>(sym)->getRegion()); break; - case SymExpr::ConjuredKind: + case SymExpr::SymbolConjuredKind: KnownLive = false; break; - case SymExpr::DerivedKind: + case SymExpr::SymbolDerivedKind: KnownLive = isLive(cast<SymbolDerived>(sym)->getParentSymbol()); break; - case SymExpr::ExtentKind: + case SymExpr::SymbolExtentKind: KnownLive = isLiveRegion(cast<SymbolExtent>(sym)->getRegion()); break; - case SymExpr::MetadataKind: + case SymExpr::SymbolMetadataKind: KnownLive = MetadataInUse.count(sym) && isLiveRegion(cast<SymbolMetadata>(sym)->getRegion()); if (KnownLive) MetadataInUse.erase(sym); break; - case SymExpr::SymIntKind: + case SymExpr::SymIntExprKind: KnownLive = isLive(cast<SymIntExpr>(sym)->getLHS()); break; - case SymExpr::IntSymKind: + case SymExpr::IntSymExprKind: KnownLive = isLive(cast<IntSymExpr>(sym)->getRHS()); break; - case SymExpr::SymSymKind: + case SymExpr::SymSymExprKind: KnownLive = isLive(cast<SymSymExpr>(sym)->getLHS()) && isLive(cast<SymSymExpr>(sym)->getRHS()); break; - case SymExpr::CastSymbolKind: + case SymExpr::SymbolCastKind: KnownLive = isLive(cast<SymbolCast>(sym)->getOperand()); break; } diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp index bf85c4c..d144685 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp @@ -496,10 +496,11 @@ void AnalysisConsumer::HandleDeclsCallGraph(const unsigned LocalTUDeclsSize) { (Mgr->options.InliningMode == All ? nullptr : &VisitedCallees)); // Add the visited callees to the global visited set. - for (SetOfConstDecls::iterator I = VisitedCallees.begin(), - E = VisitedCallees.end(); I != E; ++I) { - Visited.insert(*I); - } + for (const Decl *Callee : VisitedCallees) + // Decls from CallGraph are already canonical. But Decls coming from + // CallExprs may be not. We should canonicalize them manually. + Visited.insert(isa<ObjCMethodDecl>(Callee) ? Callee + : Callee->getCanonicalDecl()); VisitedAsTopLevel.insert(D); } } diff --git a/contrib/llvm/tools/lldb/include/lldb/Core/LoadedModuleInfoList.h b/contrib/llvm/tools/lldb/include/lldb/Core/LoadedModuleInfoList.h new file mode 100644 index 0000000..6ba5c28 --- /dev/null +++ b/contrib/llvm/tools/lldb/include/lldb/Core/LoadedModuleInfoList.h @@ -0,0 +1,152 @@ +//===-- LoadedModuleInfoList.h ----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef liblldb_LoadedModuleInfoList_h_ +#define liblldb_LoadedModuleInfoList_h_ + +// C Includes + +// C++ Includes +#include <vector> + +// Other libraries and framework includes +#include "lldb/lldb-private-forward.h" + +namespace lldb_private { +class LoadedModuleInfoList +{ +public: + + class LoadedModuleInfo + { + public: + + enum e_data_point + { + e_has_name = 0, + e_has_base , + e_has_dynamic , + e_has_link_map , + e_num + }; + + LoadedModuleInfo () + { + for (uint32_t i = 0; i < e_num; ++i) + m_has[i] = false; + }; + + void set_name (const std::string & name) + { + m_name = name; + m_has[e_has_name] = true; + } + bool get_name (std::string & out) const + { + out = m_name; + return m_has[e_has_name]; + } + + void set_base (const lldb::addr_t base) + { + m_base = base; + m_has[e_has_base] = true; + } + bool get_base (lldb::addr_t & out) const + { + out = m_base; + return m_has[e_has_base]; + } + + void set_base_is_offset (bool is_offset) + { + m_base_is_offset = is_offset; + } + bool get_base_is_offset(bool & out) const + { + out = m_base_is_offset; + return m_has[e_has_base]; + } + + void set_link_map (const lldb::addr_t addr) + { + m_link_map = addr; + m_has[e_has_link_map] = true; + } + bool get_link_map (lldb::addr_t & out) const + { + out = m_link_map; + return m_has[e_has_link_map]; + } + + void set_dynamic (const lldb::addr_t addr) + { + m_dynamic = addr; + m_has[e_has_dynamic] = true; + } + bool get_dynamic (lldb::addr_t & out) const + { + out = m_dynamic; + return m_has[e_has_dynamic]; + } + + bool has_info (e_data_point datum) const + { + assert (datum < e_num); + return m_has[datum]; + } + + bool + operator == (LoadedModuleInfo const &rhs) const + { + if (e_num != rhs.e_num) + return false; + + for (size_t i = 0; i < e_num; ++i) + { + if (m_has[i] != rhs.m_has[i]) + return false; + } + + return (m_base == rhs.m_base) && + (m_link_map == rhs.m_link_map) && + (m_dynamic == rhs.m_dynamic) && + (m_name == rhs.m_name); + } + protected: + + bool m_has[e_num]; + std::string m_name; + lldb::addr_t m_link_map; + lldb::addr_t m_base; + bool m_base_is_offset; + lldb::addr_t m_dynamic; + }; + + LoadedModuleInfoList () + : m_list () + , m_link_map (LLDB_INVALID_ADDRESS) + {} + + void add (const LoadedModuleInfo & mod) + { + m_list.push_back (mod); + } + + void clear () + { + m_list.clear (); + } + + std::vector<LoadedModuleInfo> m_list; + lldb::addr_t m_link_map; +}; +} // namespace lldb_private + +#endif // liblldb_LoadedModuleInfoList_h_ diff --git a/contrib/llvm/tools/lldb/include/lldb/Interpreter/CommandReturnObject.h b/contrib/llvm/tools/lldb/include/lldb/Interpreter/CommandReturnObject.h index 424ac80..e3a1d9f 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Interpreter/CommandReturnObject.h +++ b/contrib/llvm/tools/lldb/include/lldb/Interpreter/CommandReturnObject.h @@ -169,6 +169,18 @@ public: void SetInteractive (bool b); + + bool + GetAbnormalStopWasExpected() const + { + return m_abnormal_stop_was_expected; + } + + void + SetAbnormalStopWasExpected(bool signal_was_expected) + { + m_abnormal_stop_was_expected = signal_was_expected; + } private: enum @@ -182,7 +194,13 @@ private: lldb::ReturnStatus m_status; bool m_did_change_process_state; - bool m_interactive; // If true, then the input handle from the debugger will be hooked up + bool m_interactive; // If true, then the input handle from the debugger will be hooked up + bool m_abnormal_stop_was_expected; // This is to support eHandleCommandFlagStopOnCrash vrs. attach. + // The attach command often ends up with the process stopped due to a signal. + // Normally that would mean stop on crash should halt batch execution, but we + // obviously don't want that for attach. Using this flag, the attach command + // (and anything else for which this is relevant) can say that the signal is + // expected, and batch command execution can continue. }; } // namespace lldb_private diff --git a/contrib/llvm/tools/lldb/include/lldb/Symbol/ClangASTContext.h b/contrib/llvm/tools/lldb/include/lldb/Symbol/ClangASTContext.h index 0314ce0..bd3a113 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Symbol/ClangASTContext.h +++ b/contrib/llvm/tools/lldb/include/lldb/Symbol/ClangASTContext.h @@ -573,6 +573,9 @@ public: ConstString DeclContextGetName (void *opaque_decl_ctx) override; + ConstString + DeclContextGetScopeQualifiedName (void *opaque_decl_ctx) override; + bool DeclContextIsClassMethod (void *opaque_decl_ctx, lldb::LanguageType *language_ptr, diff --git a/contrib/llvm/tools/lldb/include/lldb/Symbol/CompilerDeclContext.h b/contrib/llvm/tools/lldb/include/lldb/Symbol/CompilerDeclContext.h index 70399b2..9135b44 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Symbol/CompilerDeclContext.h +++ b/contrib/llvm/tools/lldb/include/lldb/Symbol/CompilerDeclContext.h @@ -128,6 +128,9 @@ public: ConstString GetName () const; + ConstString + GetScopeQualifiedName() const; + bool IsStructUnionOrClass () const; diff --git a/contrib/llvm/tools/lldb/include/lldb/Symbol/GoASTContext.h b/contrib/llvm/tools/lldb/include/lldb/Symbol/GoASTContext.h index 3de98da..09d79ba 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Symbol/GoASTContext.h +++ b/contrib/llvm/tools/lldb/include/lldb/Symbol/GoASTContext.h @@ -112,6 +112,12 @@ class GoASTContext : public TypeSystem return ConstString(); } + ConstString + DeclContextGetScopeQualifiedName(void *opaque_decl_ctx) override + { + return ConstString(); + } + bool DeclContextIsClassMethod(void *opaque_decl_ctx, lldb::LanguageType *language_ptr, bool *is_instance_method_ptr, ConstString *language_object_name_ptr) override diff --git a/contrib/llvm/tools/lldb/include/lldb/Symbol/SymbolFile.h b/contrib/llvm/tools/lldb/include/lldb/Symbol/SymbolFile.h index e27b32d..fe74ad4 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Symbol/SymbolFile.h +++ b/contrib/llvm/tools/lldb/include/lldb/Symbol/SymbolFile.h @@ -144,6 +144,7 @@ public: virtual uint32_t FindTypes (const SymbolContext& sc, const ConstString &name, const CompilerDeclContext *parent_decl_ctx, bool append, uint32_t max_matches, TypeMap& types); virtual size_t FindTypes (const std::vector<CompilerContext> &context, bool append, TypeMap& types); + virtual void GetMangledNamesForFunction(const std::string &scope_qualified_name, std::vector<ConstString> &mangled_names); // virtual uint32_t FindTypes (const SymbolContext& sc, const RegularExpression& regex, bool append, uint32_t max_matches, TypeList& types) = 0; virtual TypeList * GetTypeList (); virtual size_t GetTypes (lldb_private::SymbolContextScope *sc_scope, diff --git a/contrib/llvm/tools/lldb/include/lldb/Symbol/TypeSystem.h b/contrib/llvm/tools/lldb/include/lldb/Symbol/TypeSystem.h index d367bcd..9b43b9d 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Symbol/TypeSystem.h +++ b/contrib/llvm/tools/lldb/include/lldb/Symbol/TypeSystem.h @@ -151,6 +151,9 @@ public: virtual ConstString DeclContextGetName (void *opaque_decl_ctx) = 0; + virtual ConstString + DeclContextGetScopeQualifiedName (void *opaque_decl_ctx) = 0; + virtual bool DeclContextIsClassMethod (void *opaque_decl_ctx, lldb::LanguageType *language_ptr, diff --git a/contrib/llvm/tools/lldb/include/lldb/Target/CPPLanguageRuntime.h b/contrib/llvm/tools/lldb/include/lldb/Target/CPPLanguageRuntime.h index ac537d0..788f4e6 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Target/CPPLanguageRuntime.h +++ b/contrib/llvm/tools/lldb/include/lldb/Target/CPPLanguageRuntime.h @@ -42,9 +42,6 @@ public: bool GetObjectDescription(Stream &str, Value &value, ExecutionContextScope *exe_scope) override; - virtual size_t - GetAlternateManglings(const ConstString &mangled, std::vector<ConstString> &alternates) = 0; - protected: //------------------------------------------------------------------ diff --git a/contrib/llvm/tools/lldb/include/lldb/Target/Process.h b/contrib/llvm/tools/lldb/include/lldb/Target/Process.h index 2e063c5..6bb7a3d 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Target/Process.h +++ b/contrib/llvm/tools/lldb/include/lldb/Target/Process.h @@ -30,6 +30,7 @@ #include "lldb/Core/Communication.h" #include "lldb/Core/Error.h" #include "lldb/Core/Event.h" +#include "lldb/Core/LoadedModuleInfoList.h" #include "lldb/Core/ThreadSafeValue.h" #include "lldb/Core/PluginInterface.h" #include "lldb/Core/StructuredData.h" @@ -1152,6 +1153,12 @@ public: return 0; } + virtual size_t + LoadModules (LoadedModuleInfoList &) + { + return 0; + } + protected: virtual JITLoaderList & GetJITLoaders (); @@ -3149,6 +3156,34 @@ public: void ResetImageToken(size_t token); + //------------------------------------------------------------------ + /// Find the next branch instruction to set a breakpoint on + /// + /// When instruction stepping through a source line, instead of + /// stepping through each instruction, we can put a breakpoint on + /// the next branch instruction (within the range of instructions + /// we are stepping through) and continue the process to there, + /// yielding significant performance benefits over instruction + /// stepping. + /// + /// @param[in] default_stop_addr + /// The address of the instruction where lldb would put a + /// breakpoint normally. + /// + /// @param[in] range_bounds + /// The range which the breakpoint must be contained within. + /// Typically a source line. + /// + /// @return + /// The address of the next branch instruction, or the end of + /// the range provided in range_bounds. If there are any + /// problems with the disassembly or getting the instructions, + /// the original default_stop_addr will be returned. + //------------------------------------------------------------------ + Address + AdvanceAddressToNextBranchInstruction (Address default_stop_addr, + AddressRange range_bounds); + protected: void SetState (lldb::EventSP &event_sp); diff --git a/contrib/llvm/tools/lldb/include/lldb/Target/SystemRuntime.h b/contrib/llvm/tools/lldb/include/lldb/Target/SystemRuntime.h index 54fde88..cefd724 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Target/SystemRuntime.h +++ b/contrib/llvm/tools/lldb/include/lldb/Target/SystemRuntime.h @@ -275,6 +275,23 @@ public: return LLDB_INVALID_ADDRESS; } + + //------------------------------------------------------------------ + /// Retrieve the Queue kind for the queue at a thread's dispatch_qaddr. + /// + /// Retrieve the Queue kind - either eQueueKindSerial or + /// eQueueKindConcurrent, indicating that this queue processes work + /// items serially or concurrently. + /// + /// @return + /// The Queue kind, if it could be read, else eQueueKindUnknown. + //------------------------------------------------------------------ + virtual lldb::QueueKind + GetQueueKind (lldb::addr_t dispatch_qaddr) + { + return lldb::eQueueKindUnknown; + } + //------------------------------------------------------------------ /// Get the pending work items for a libdispatch Queue /// diff --git a/contrib/llvm/tools/lldb/include/lldb/Target/Thread.h b/contrib/llvm/tools/lldb/include/lldb/Target/Thread.h index 7aff77b..ba73e0b 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Target/Thread.h +++ b/contrib/llvm/tools/lldb/include/lldb/Target/Thread.h @@ -367,6 +367,35 @@ public: } //------------------------------------------------------------------ + /// Whether this thread can be associated with a libdispatch queue + /// + /// The Thread may know if it is associated with a libdispatch queue, + /// it may know definitively that it is NOT associated with a libdispatch + /// queue, or it may be unknown whether it is associated with a libdispatch + /// queue. + /// + /// @return + /// eLazyBoolNo if this thread is definitely not associated with a + /// libdispatch queue (e.g. on a non-Darwin system where GCD aka + /// libdispatch is not available). + /// + /// eLazyBoolYes this thread is associated with a libdispatch queue. + /// + /// eLazyBoolCalculate this thread may be associated with a libdispatch + /// queue but the thread doesn't know one way or the other. + //------------------------------------------------------------------ + virtual lldb_private::LazyBool + GetAssociatedWithLibdispatchQueue () + { + return eLazyBoolNo; + } + + virtual void + SetAssociatedWithLibdispatchQueue (lldb_private::LazyBool associated_with_libdispatch_queue) + { + } + + //------------------------------------------------------------------ /// Retrieve the Queue ID for the queue currently using this Thread /// /// If this Thread is doing work on behalf of a libdispatch/GCD queue, @@ -414,6 +443,29 @@ public: } //------------------------------------------------------------------ + /// Retrieve the Queue kind for the queue currently using this Thread + /// + /// If this Thread is doing work on behalf of a libdispatch/GCD queue, + /// retrieve the Queue kind - either eQueueKindSerial or + /// eQueueKindConcurrent, indicating that this queue processes work + /// items serially or concurrently. + /// + /// @return + /// The Queue kind, if the Thread subclass implements this, else + /// eQueueKindUnknown. + //------------------------------------------------------------------ + virtual lldb::QueueKind + GetQueueKind () + { + return lldb::eQueueKindUnknown; + } + + virtual void + SetQueueKind (lldb::QueueKind kind) + { + } + + //------------------------------------------------------------------ /// Retrieve the Queue for this thread, if any. /// /// @return @@ -451,6 +503,30 @@ public: return LLDB_INVALID_ADDRESS; } + virtual void + SetQueueLibdispatchQueueAddress (lldb::addr_t dispatch_queue_t) + { + } + + //------------------------------------------------------------------ + /// Whether this Thread already has all the Queue information cached or not + /// + /// A Thread may be associated with a libdispatch work Queue at a given + /// public stop event. If so, the thread can satisify requests like + /// GetQueueLibdispatchQueueAddress, GetQueueKind, GetQueueName, and GetQueueID + /// either from information from the remote debug stub when it is initially + /// created, or it can query the SystemRuntime for that information. + /// + /// This method allows the SystemRuntime to discover if a thread has this + /// information already, instead of calling the thread to get the information + /// and having the thread call the SystemRuntime again. + //------------------------------------------------------------------ + virtual bool + ThreadHasQueueInformation () const + { + return false; + } + virtual uint32_t GetStackFrameCount() { @@ -888,6 +964,16 @@ public: /// @param[in] run_vote /// See standard meanings for the stop & run votes in ThreadPlan.h. /// + /// @param[in] continue_to_next_branch + /// Normally this will enqueue a plan that will put a breakpoint on the return address and continue + /// to there. If continue_to_next_branch is true, this is an operation not involving the user -- + /// e.g. stepping "next" in a source line and we instruction stepped into another function -- + /// so instead of putting a breakpoint on the return address, advance the breakpoint to the + /// end of the source line that is doing the call, or until the next flow control instruction. + /// If the return value from the function call is to be retrieved / displayed to the user, you must stop + /// on the return address. The return value may be stored in volatile registers which are overwritten + /// before the next branch instruction. + /// /// @return /// A shared pointer to the newly queued thread plan, or nullptr if the plan could not be queued. //------------------------------------------------------------------ @@ -898,7 +984,8 @@ public: bool stop_other_threads, Vote stop_vote, // = eVoteYes, Vote run_vote, // = eVoteNoOpinion); - uint32_t frame_idx); + uint32_t frame_idx, + bool continue_to_next_branch = false); //------------------------------------------------------------------ /// Gets the plan used to step through the code that steps from a function diff --git a/contrib/llvm/tools/lldb/include/lldb/Target/ThreadPlanStepOut.h b/contrib/llvm/tools/lldb/include/lldb/Target/ThreadPlanStepOut.h index ac56963..ccf829f 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Target/ThreadPlanStepOut.h +++ b/contrib/llvm/tools/lldb/include/lldb/Target/ThreadPlanStepOut.h @@ -31,7 +31,8 @@ public: Vote stop_vote, Vote run_vote, uint32_t frame_idx, - LazyBool step_out_avoids_code_without_debug_info); + LazyBool step_out_avoids_code_without_debug_info, + bool continue_to_next_branch = false); ~ThreadPlanStepOut() override; diff --git a/contrib/llvm/tools/lldb/source/Commands/CommandObjectProcess.cpp b/contrib/llvm/tools/lldb/source/Commands/CommandObjectProcess.cpp index b7f894f..a85ea17 100644 --- a/contrib/llvm/tools/lldb/source/Commands/CommandObjectProcess.cpp +++ b/contrib/llvm/tools/lldb/source/Commands/CommandObjectProcess.cpp @@ -550,6 +550,7 @@ protected: result.AppendMessage(stream.GetData()); result.SetStatus (eReturnStatusSuccessFinishNoResult); result.SetDidChangeProcessState (true); + result.SetAbnormalStopWasExpected(true); } else { diff --git a/contrib/llvm/tools/lldb/source/Core/StringList.cpp b/contrib/llvm/tools/lldb/source/Core/StringList.cpp index ce197ac..98a0790 100644 --- a/contrib/llvm/tools/lldb/source/Core/StringList.cpp +++ b/contrib/llvm/tools/lldb/source/Core/StringList.cpp @@ -12,7 +12,6 @@ #include "lldb/Core/StreamString.h" #include "lldb/Host/FileSpec.h" #include "lldb/Core/Log.h" -#include "lldb/Core/StreamString.h" #include <string> diff --git a/contrib/llvm/tools/lldb/source/Host/common/HostInfoBase.cpp b/contrib/llvm/tools/lldb/source/Host/common/HostInfoBase.cpp index 0f4324f..f7ba755 100644 --- a/contrib/llvm/tools/lldb/source/Host/common/HostInfoBase.cpp +++ b/contrib/llvm/tools/lldb/source/Host/common/HostInfoBase.cpp @@ -409,13 +409,13 @@ HostInfoBase::ComputeHostArchitectureSupport(ArchSpec &arch_32, ArchSpec &arch_6 arch_32.SetTriple(triple); break; + case llvm::Triple::aarch64: case llvm::Triple::ppc64: case llvm::Triple::x86_64: arch_64.SetTriple(triple); arch_32.SetTriple(triple.get32BitArchVariant()); break; - case llvm::Triple::aarch64: case llvm::Triple::mips64: case llvm::Triple::mips64el: case llvm::Triple::sparcv9: diff --git a/contrib/llvm/tools/lldb/source/Interpreter/CommandInterpreter.cpp b/contrib/llvm/tools/lldb/source/Interpreter/CommandInterpreter.cpp index b7cc607..fd88f0d 100644 --- a/contrib/llvm/tools/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/contrib/llvm/tools/lldb/source/Interpreter/CommandInterpreter.cpp @@ -3038,7 +3038,10 @@ CommandInterpreter::IOHandlerInputComplete (IOHandler &io_handler, std::string & for (ThreadSP thread_sp : process_sp->GetThreadList().Threads()) { StopReason reason = thread_sp->GetStopReason(); - if (reason == eStopReasonSignal || reason == eStopReasonException || reason == eStopReasonInstrumentation) + if ((reason == eStopReasonSignal + || reason == eStopReasonException + || reason == eStopReasonInstrumentation) + && !result.GetAbnormalStopWasExpected()) { should_stop = true; break; diff --git a/contrib/llvm/tools/lldb/source/Interpreter/CommandReturnObject.cpp b/contrib/llvm/tools/lldb/source/Interpreter/CommandReturnObject.cpp index 1b54187..b083c7f 100644 --- a/contrib/llvm/tools/lldb/source/Interpreter/CommandReturnObject.cpp +++ b/contrib/llvm/tools/lldb/source/Interpreter/CommandReturnObject.cpp @@ -47,7 +47,8 @@ CommandReturnObject::CommandReturnObject () : m_err_stream (), m_status (eReturnStatusStarted), m_did_change_process_state (false), - m_interactive (true) + m_interactive (true), + m_abnormal_stop_was_expected(false) { } diff --git a/contrib/llvm/tools/lldb/source/Plugins/ABI/SysV-mips64/ABISysV_mips64.cpp b/contrib/llvm/tools/lldb/source/Plugins/ABI/SysV-mips64/ABISysV_mips64.cpp index bc62c9f..e3da363 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/ABI/SysV-mips64/ABISysV_mips64.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/ABI/SysV-mips64/ABISysV_mips64.cpp @@ -206,6 +206,7 @@ ABISysV_mips64::PrepareTrivialCall (Thread &thread, const RegisterInfo *pc_reg_info = reg_ctx->GetRegisterInfo (eRegisterKindGeneric, LLDB_REGNUM_GENERIC_PC); const RegisterInfo *sp_reg_info = reg_ctx->GetRegisterInfo (eRegisterKindGeneric, LLDB_REGNUM_GENERIC_SP); const RegisterInfo *ra_reg_info = reg_ctx->GetRegisterInfo (eRegisterKindGeneric, LLDB_REGNUM_GENERIC_RA); + const RegisterInfo *r25_info = reg_ctx->GetRegisterInfoByName("r25", 0); if (log) log->Printf("Writing SP: 0x%" PRIx64, (uint64_t)sp); @@ -228,6 +229,13 @@ ABISysV_mips64::PrepareTrivialCall (Thread &thread, if (!reg_ctx->WriteRegisterFromUnsigned (pc_reg_info, func_addr)) return false; + if (log) + log->Printf("Writing r25: 0x%" PRIx64, (uint64_t)func_addr); + + // All callers of position independent functions must place the address of the called function in t9 (r25) + if (!reg_ctx->WriteRegisterFromUnsigned (r25_info, func_addr)) + return false; + return true; } diff --git a/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp b/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp index 09e874a..443f97e 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp @@ -107,6 +107,7 @@ DYLDRendezvous::DYLDRendezvous(Process *process) m_rendezvous_addr(LLDB_INVALID_ADDRESS), m_current(), m_previous(), + m_loaded_modules(), m_soentries(), m_added_soentries(), m_removed_soentries() @@ -181,6 +182,9 @@ DYLDRendezvous::Resolve() m_previous = m_current; m_current = info; + if (UpdateSOEntries (true)) + return true; + return UpdateSOEntries(); } @@ -191,18 +195,23 @@ DYLDRendezvous::IsValid() } bool -DYLDRendezvous::UpdateSOEntries() +DYLDRendezvous::UpdateSOEntries(bool fromRemote) { SOEntry entry; + LoadedModuleInfoList module_list; - if (m_current.map_addr == 0) + // If we can't get the SO info from the remote, return failure. + if (fromRemote && m_process->LoadModules (module_list) == 0) + return false; + + if (!fromRemote && m_current.map_addr == 0) return false; // When the previous and current states are consistent this is the first // time we have been asked to update. Just take a snapshot of the currently // loaded modules. - if (m_previous.state == eConsistent && m_current.state == eConsistent) - return TakeSnapshot(m_soentries); + if (m_previous.state == eConsistent && m_current.state == eConsistent) + return fromRemote ? SaveSOEntriesFromRemote(module_list) : TakeSnapshot(m_soentries); // If we are about to add or remove a shared object clear out the current // state and take a snapshot of the currently loaded images. @@ -215,6 +224,9 @@ DYLDRendezvous::UpdateSOEntries() return false; m_soentries.clear(); + if (fromRemote) + return SaveSOEntriesFromRemote(module_list); + m_added_soentries.clear(); m_removed_soentries.clear(); return TakeSnapshot(m_soentries); @@ -224,15 +236,133 @@ DYLDRendezvous::UpdateSOEntries() // Otherwise check the previous state to determine what to expect and update // accordingly. if (m_previous.state == eAdd) - return UpdateSOEntriesForAddition(); + return fromRemote ? AddSOEntriesFromRemote(module_list) : AddSOEntries(); else if (m_previous.state == eDelete) - return UpdateSOEntriesForDeletion(); + return fromRemote ? RemoveSOEntriesFromRemote(module_list) : RemoveSOEntries(); return false; } - + bool -DYLDRendezvous::UpdateSOEntriesForAddition() +DYLDRendezvous::FillSOEntryFromModuleInfo (LoadedModuleInfoList::LoadedModuleInfo const & modInfo, + SOEntry &entry) +{ + addr_t link_map_addr; + addr_t base_addr; + addr_t dyn_addr; + std::string name; + + if (!modInfo.get_link_map (link_map_addr) || + !modInfo.get_base (base_addr) || + !modInfo.get_dynamic (dyn_addr) || + !modInfo.get_name (name)) + return false; + + entry.link_addr = link_map_addr; + entry.base_addr = base_addr; + entry.dyn_addr = dyn_addr; + + entry.file_spec.SetFile(name, false); + + UpdateBaseAddrIfNecessary(entry, name); + + // not needed if we're using ModuleInfos + entry.next = 0; + entry.prev = 0; + entry.path_addr = 0; + + return true; +} + +bool +DYLDRendezvous::SaveSOEntriesFromRemote(LoadedModuleInfoList &module_list) +{ + for (auto const & modInfo : module_list.m_list) + { + SOEntry entry; + if (!FillSOEntryFromModuleInfo(modInfo, entry)) + return false; + + // Only add shared libraries and not the executable. + if (!SOEntryIsMainExecutable(entry)) + m_soentries.push_back(entry); + } + + m_loaded_modules = module_list; + return true; + +} + +bool +DYLDRendezvous::AddSOEntriesFromRemote(LoadedModuleInfoList &module_list) +{ + for (auto const & modInfo : module_list.m_list) + { + bool found = false; + for (auto const & existing : m_loaded_modules.m_list) + { + if (modInfo == existing) + { + found = true; + break; + } + } + + if (found) + continue; + + SOEntry entry; + if (!FillSOEntryFromModuleInfo(modInfo, entry)) + return false; + + // Only add shared libraries and not the executable. + if (!SOEntryIsMainExecutable(entry)) + m_soentries.push_back(entry); + } + + m_loaded_modules = module_list; + return true; +} + +bool +DYLDRendezvous::RemoveSOEntriesFromRemote(LoadedModuleInfoList &module_list) +{ + for (auto const & existing : m_loaded_modules.m_list) + { + bool found = false; + for (auto const & modInfo : module_list.m_list) + { + if (modInfo == existing) + { + found = true; + break; + } + } + + if (found) + continue; + + SOEntry entry; + if (!FillSOEntryFromModuleInfo(existing, entry)) + return false; + + // Only add shared libraries and not the executable. + if (!SOEntryIsMainExecutable(entry)) + { + auto pos = std::find(m_soentries.begin(), m_soentries.end(), entry); + if (pos == m_soentries.end()) + return false; + + m_soentries.erase(pos); + } + } + + m_loaded_modules = module_list; + return true; +} + +bool +DYLDRendezvous::AddSOEntries() { SOEntry entry; iterator pos; @@ -263,7 +393,7 @@ DYLDRendezvous::UpdateSOEntriesForAddition() } bool -DYLDRendezvous::UpdateSOEntriesForDeletion() +DYLDRendezvous::RemoveSOEntries() { SOEntryList entry_list; iterator pos; @@ -291,7 +421,8 @@ DYLDRendezvous::SOEntryIsMainExecutable(const SOEntry &entry) // FreeBSD and on Android it is the full path to the executable. auto triple = m_process->GetTarget().GetArchitecture().GetTriple(); - switch (triple.getOS()) { + switch (triple.getOS()) + { case llvm::Triple::FreeBSD: return entry.file_spec == m_exe_file_spec; case llvm::Triple::Linux: @@ -386,6 +517,21 @@ isLoadBiasIncorrect(Target& target, const std::string& file_path) return false; } +void +DYLDRendezvous::UpdateBaseAddrIfNecessary(SOEntry &entry, std::string const &file_path) +{ + // If the load bias reported by the linker is incorrect then fetch the load address of the file + // from the proc file system. + if (isLoadBiasIncorrect(m_process->GetTarget(), file_path)) + { + lldb::addr_t load_addr = LLDB_INVALID_ADDRESS; + bool is_loaded = false; + Error error = m_process->GetFileLoadAddress(entry.file_spec, is_loaded, load_addr); + if (error.Success() && is_loaded) + entry.base_addr = load_addr; + } +} + bool DYLDRendezvous::ReadSOEntryFromMemory(lldb::addr_t addr, SOEntry &entry) { @@ -427,16 +573,7 @@ DYLDRendezvous::ReadSOEntryFromMemory(lldb::addr_t addr, SOEntry &entry) std::string file_path = ReadStringFromMemory(entry.path_addr); entry.file_spec.SetFile(file_path, false); - // If the load bias reported by the linker is incorrect then fetch the load address of the file - // from the proc file system. - if (isLoadBiasIncorrect(m_process->GetTarget(), file_path)) - { - lldb::addr_t load_addr = LLDB_INVALID_ADDRESS; - bool is_loaded = false; - Error error = m_process->GetFileLoadAddress(entry.file_spec, is_loaded, load_addr); - if (error.Success() && is_loaded) - entry.base_addr = load_addr; - } + UpdateBaseAddrIfNecessary(entry, file_path); return true; } diff --git a/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.h b/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.h index ec5af94..8498116 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.h +++ b/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.h @@ -20,6 +20,10 @@ #include "lldb/lldb-types.h" #include "lldb/Host/FileSpec.h" +#include "lldb/Core/LoadedModuleInfoList.h" + +using lldb_private::LoadedModuleInfoList; + namespace lldb_private { class Process; } @@ -201,6 +205,9 @@ protected: Rendezvous m_current; Rendezvous m_previous; + /// List of currently loaded SO modules + LoadedModuleInfoList m_loaded_modules; + /// List of SOEntry objects corresponding to the current link map state. SOEntryList m_soentries; @@ -240,13 +247,29 @@ protected: /// Updates the current set of SOEntries, the set of added entries, and the /// set of removed entries. bool - UpdateSOEntries(); + UpdateSOEntries(bool fromRemote = false); + + bool + FillSOEntryFromModuleInfo (LoadedModuleInfoList::LoadedModuleInfo const & modInfo, + SOEntry &entry); + + bool + SaveSOEntriesFromRemote(LoadedModuleInfoList &module_list); bool - UpdateSOEntriesForAddition(); + AddSOEntriesFromRemote(LoadedModuleInfoList &module_list); bool - UpdateSOEntriesForDeletion(); + RemoveSOEntriesFromRemote(LoadedModuleInfoList &module_list); + + bool + AddSOEntries(); + + bool + RemoveSOEntries(); + + void + UpdateBaseAddrIfNecessary(SOEntry &entry, std::string const &file_path); bool SOEntryIsMainExecutable(const SOEntry &entry); diff --git a/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp index c9bc4b6..f4d6b19 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp @@ -36,6 +36,7 @@ #include "lldb/Symbol/Function.h" #include "lldb/Symbol/ObjectFile.h" #include "lldb/Symbol/SymbolContext.h" +#include "lldb/Symbol/SymbolFile.h" #include "lldb/Symbol/SymbolVendor.h" #include "lldb/Symbol/Type.h" #include "lldb/Symbol/TypeList.h" @@ -570,6 +571,63 @@ FindCodeSymbolInContext } } +ConstString +FindBestAlternateMangledName +( + const ConstString &demangled, + const LanguageType &lang_type, + SymbolContext &sym_ctx +) +{ + CPlusPlusLanguage::MethodName cpp_name(demangled); + std::string scope_qualified_name = cpp_name.GetScopeQualifiedName(); + + if (!scope_qualified_name.size()) + return ConstString(); + + if (!sym_ctx.module_sp) + return ConstString(); + + SymbolVendor *sym_vendor = sym_ctx.module_sp->GetSymbolVendor(); + if (!sym_vendor) + return ConstString(); + + lldb_private::SymbolFile *sym_file = sym_vendor->GetSymbolFile(); + if (!sym_file) + return ConstString(); + + std::vector<ConstString> alternates; + sym_file->GetMangledNamesForFunction(scope_qualified_name, alternates); + + std::vector<ConstString> param_and_qual_matches; + std::vector<ConstString> param_matches; + for (size_t i = 0; i < alternates.size(); i++) + { + ConstString alternate_mangled_name = alternates[i]; + Mangled mangled(alternate_mangled_name, true); + ConstString demangled = mangled.GetDemangledName(lang_type); + + CPlusPlusLanguage::MethodName alternate_cpp_name(demangled); + if (!cpp_name.IsValid()) + continue; + + if (alternate_cpp_name.GetArguments() == cpp_name.GetArguments()) + { + if (alternate_cpp_name.GetQualifiers() == cpp_name.GetQualifiers()) + param_and_qual_matches.push_back(alternate_mangled_name); + else + param_matches.push_back(alternate_mangled_name); + } + } + + if (param_and_qual_matches.size()) + return param_and_qual_matches[0]; // It is assumed that there will be only one! + else if (param_matches.size()) + return param_matches[0]; // Return one of them as a best match + else + return ConstString(); +} + bool ClangExpressionDeclMap::GetFunctionAddress ( @@ -603,15 +661,25 @@ ClangExpressionDeclMap::GetFunctionAddress if (Language::LanguageIsCPlusPlus(lang_type) && CPlusPlusLanguage::IsCPPMangledName(name.AsCString())) { - // 1. Demangle the name Mangled mangled(name, true); ConstString demangled = mangled.GetDemangledName(lang_type); if (demangled) { - FindCodeSymbolInContext( - demangled, m_parser_vars->m_sym_ctx, eFunctionNameTypeFull, sc_list); - sc_list_size = sc_list.GetSize(); + ConstString best_alternate_mangled_name = FindBestAlternateMangledName(demangled, lang_type, sc); + if (best_alternate_mangled_name) + { + FindCodeSymbolInContext( + best_alternate_mangled_name, m_parser_vars->m_sym_ctx, eFunctionNameTypeAuto, sc_list); + sc_list_size = sc_list.GetSize(); + } + + if (sc_list_size == 0) + { + FindCodeSymbolInContext( + demangled, m_parser_vars->m_sym_ctx, eFunctionNameTypeFull, sc_list); + sc_list_size = sc_list.GetSize(); + } } } } diff --git a/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp b/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp index 37b7bd1d..509c594 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp @@ -36,7 +36,6 @@ #include "lldb/Host/Endian.h" #include "lldb/Symbol/ClangASTContext.h" #include "lldb/Symbol/CompilerType.h" -#include "lldb/Target/CPPLanguageRuntime.h" #include <map> @@ -230,36 +229,6 @@ IRForTarget::GetFunctionAddress (llvm::Function *fun, { std::vector<lldb_private::ConstString> alternates; bool found_it = m_decl_map->GetFunctionAddress (name, fun_addr); - if (!found_it) - { - if (log) - log->Printf("Address of function \"%s\" not found.\n", name.GetCString()); - // Check for an alternate mangling for names from the standard library. - // For example, "std::basic_string<...>" has an alternate mangling scheme per - // the Itanium C++ ABI. - lldb::ProcessSP process_sp = m_data_allocator.GetTarget()->GetProcessSP(); - if (process_sp) - { - lldb_private::CPPLanguageRuntime *cpp_runtime = process_sp->GetCPPLanguageRuntime(); - if (cpp_runtime && cpp_runtime->GetAlternateManglings(name, alternates)) - { - for (size_t i = 0; i < alternates.size(); ++i) - { - const lldb_private::ConstString &alternate_name = alternates[i]; - if (log) - log->Printf("Looking up address of function \"%s\" with alternate name \"%s\"", - name.GetCString(), alternate_name.GetCString()); - if ((found_it = m_decl_map->GetFunctionAddress (alternate_name, fun_addr))) - { - if (log) - log->Printf("Found address of function \"%s\" with alternate name \"%s\"", - name.GetCString(), alternate_name.GetCString()); - break; - } - } - } - } - } if (!found_it) { diff --git a/contrib/llvm/tools/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp b/contrib/llvm/tools/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp index 28eba09..9c09d38 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp @@ -549,45 +549,45 @@ EmulateInstructionMIPS64::GetOpcodeForInstruction (const char *op_name) //---------------------------------------------------------------------- // Branch instructions //---------------------------------------------------------------------- - { "BEQ", &EmulateInstructionMIPS64::Emulate_BEQ, "BEQ rs,rt,offset" }, - { "BNE", &EmulateInstructionMIPS64::Emulate_BNE, "BNE rs,rt,offset" }, - { "BEQL", &EmulateInstructionMIPS64::Emulate_BEQL, "BEQL rs,rt,offset" }, - { "BNEL", &EmulateInstructionMIPS64::Emulate_BNEL, "BNEL rs,rt,offset" }, - { "BGEZALL", &EmulateInstructionMIPS64::Emulate_BGEZALL, "BGEZALL rt,offset" }, + { "BEQ", &EmulateInstructionMIPS64::Emulate_BXX_3ops, "BEQ rs,rt,offset" }, + { "BNE", &EmulateInstructionMIPS64::Emulate_BXX_3ops, "BNE rs,rt,offset" }, + { "BEQL", &EmulateInstructionMIPS64::Emulate_BXX_3ops, "BEQL rs,rt,offset" }, + { "BNEL", &EmulateInstructionMIPS64::Emulate_BXX_3ops, "BNEL rs,rt,offset" }, + { "BGEZALL", &EmulateInstructionMIPS64::Emulate_Bcond_Link, "BGEZALL rt,offset" }, { "BAL", &EmulateInstructionMIPS64::Emulate_BAL, "BAL offset" }, - { "BGEZAL", &EmulateInstructionMIPS64::Emulate_BGEZAL, "BGEZAL rs,offset" }, + { "BGEZAL", &EmulateInstructionMIPS64::Emulate_Bcond_Link, "BGEZAL rs,offset" }, { "BALC", &EmulateInstructionMIPS64::Emulate_BALC, "BALC offset" }, { "BC", &EmulateInstructionMIPS64::Emulate_BC, "BC offset" }, - { "BGEZ", &EmulateInstructionMIPS64::Emulate_BGEZ, "BGEZ rs,offset" }, - { "BLEZALC", &EmulateInstructionMIPS64::Emulate_BLEZALC, "BLEZALC rs,offset" }, - { "BGEZALC", &EmulateInstructionMIPS64::Emulate_BGEZALC, "BGEZALC rs,offset" }, - { "BLTZALC", &EmulateInstructionMIPS64::Emulate_BLTZALC, "BLTZALC rs,offset" }, - { "BGTZALC", &EmulateInstructionMIPS64::Emulate_BGTZALC, "BGTZALC rs,offset" }, - { "BEQZALC", &EmulateInstructionMIPS64::Emulate_BEQZALC, "BEQZALC rs,offset" }, - { "BNEZALC", &EmulateInstructionMIPS64::Emulate_BNEZALC, "BNEZALC rs,offset" }, - { "BEQC", &EmulateInstructionMIPS64::Emulate_BEQC, "BEQC rs,rt,offset" }, - { "BNEC", &EmulateInstructionMIPS64::Emulate_BNEC, "BNEC rs,rt,offset" }, - { "BLTC", &EmulateInstructionMIPS64::Emulate_BLTC, "BLTC rs,rt,offset" }, - { "BGEC", &EmulateInstructionMIPS64::Emulate_BGEC, "BGEC rs,rt,offset" }, - { "BLTUC", &EmulateInstructionMIPS64::Emulate_BLTUC, "BLTUC rs,rt,offset" }, - { "BGEUC", &EmulateInstructionMIPS64::Emulate_BGEUC, "BGEUC rs,rt,offset" }, - { "BLTZC", &EmulateInstructionMIPS64::Emulate_BLTZC, "BLTZC rt,offset" }, - { "BLEZC", &EmulateInstructionMIPS64::Emulate_BLEZC, "BLEZC rt,offset" }, - { "BGEZC", &EmulateInstructionMIPS64::Emulate_BGEZC, "BGEZC rt,offset" }, - { "BGTZC", &EmulateInstructionMIPS64::Emulate_BGTZC, "BGTZC rt,offset" }, - { "BEQZC", &EmulateInstructionMIPS64::Emulate_BEQZC, "BEQZC rt,offset" }, - { "BNEZC", &EmulateInstructionMIPS64::Emulate_BNEZC, "BNEZC rt,offset" }, - { "BGEZL", &EmulateInstructionMIPS64::Emulate_BGEZL, "BGEZL rt,offset" }, - { "BGTZ", &EmulateInstructionMIPS64::Emulate_BGTZ, "BGTZ rt,offset" }, - { "BGTZL", &EmulateInstructionMIPS64::Emulate_BGTZL, "BGTZL rt,offset" }, - { "BLEZ", &EmulateInstructionMIPS64::Emulate_BLEZ, "BLEZ rt,offset" }, - { "BLEZL", &EmulateInstructionMIPS64::Emulate_BLEZL, "BLEZL rt,offset" }, - { "BLTZ", &EmulateInstructionMIPS64::Emulate_BLTZ, "BLTZ rt,offset" }, - { "BLTZAL", &EmulateInstructionMIPS64::Emulate_BLTZAL, "BLTZAL rt,offset" }, - { "BLTZALL", &EmulateInstructionMIPS64::Emulate_BLTZALL, "BLTZALL rt,offset" }, - { "BLTZL", &EmulateInstructionMIPS64::Emulate_BLTZL, "BLTZL rt,offset" }, - { "BOVC", &EmulateInstructionMIPS64::Emulate_BOVC, "BOVC rs,rt,offset" }, - { "BNVC", &EmulateInstructionMIPS64::Emulate_BNVC, "BNVC rs,rt,offset" }, + { "BGEZ", &EmulateInstructionMIPS64::Emulate_BXX_2ops, "BGEZ rs,offset" }, + { "BLEZALC", &EmulateInstructionMIPS64::Emulate_Bcond_Link_C,"BLEZALC rs,offset" }, + { "BGEZALC", &EmulateInstructionMIPS64::Emulate_Bcond_Link_C,"BGEZALC rs,offset" }, + { "BLTZALC", &EmulateInstructionMIPS64::Emulate_Bcond_Link_C,"BLTZALC rs,offset" }, + { "BGTZALC", &EmulateInstructionMIPS64::Emulate_Bcond_Link_C,"BGTZALC rs,offset" }, + { "BEQZALC", &EmulateInstructionMIPS64::Emulate_Bcond_Link_C,"BEQZALC rs,offset" }, + { "BNEZALC", &EmulateInstructionMIPS64::Emulate_Bcond_Link_C,"BNEZALC rs,offset" }, + { "BEQC", &EmulateInstructionMIPS64::Emulate_BXX_3ops_C, "BEQC rs,rt,offset" }, + { "BNEC", &EmulateInstructionMIPS64::Emulate_BXX_3ops_C, "BNEC rs,rt,offset" }, + { "BLTC", &EmulateInstructionMIPS64::Emulate_BXX_3ops_C, "BLTC rs,rt,offset" }, + { "BGEC", &EmulateInstructionMIPS64::Emulate_BXX_3ops_C, "BGEC rs,rt,offset" }, + { "BLTUC", &EmulateInstructionMIPS64::Emulate_BXX_3ops_C, "BLTUC rs,rt,offset" }, + { "BGEUC", &EmulateInstructionMIPS64::Emulate_BXX_3ops_C, "BGEUC rs,rt,offset" }, + { "BLTZC", &EmulateInstructionMIPS64::Emulate_BXX_2ops_C, "BLTZC rt,offset" }, + { "BLEZC", &EmulateInstructionMIPS64::Emulate_BXX_2ops_C, "BLEZC rt,offset" }, + { "BGEZC", &EmulateInstructionMIPS64::Emulate_BXX_2ops_C, "BGEZC rt,offset" }, + { "BGTZC", &EmulateInstructionMIPS64::Emulate_BXX_2ops_C, "BGTZC rt,offset" }, + { "BEQZC", &EmulateInstructionMIPS64::Emulate_BXX_2ops_C, "BEQZC rt,offset" }, + { "BNEZC", &EmulateInstructionMIPS64::Emulate_BXX_2ops_C, "BNEZC rt,offset" }, + { "BGEZL", &EmulateInstructionMIPS64::Emulate_BXX_2ops, "BGEZL rt,offset" }, + { "BGTZ", &EmulateInstructionMIPS64::Emulate_BXX_2ops, "BGTZ rt,offset" }, + { "BGTZL", &EmulateInstructionMIPS64::Emulate_BXX_2ops, "BGTZL rt,offset" }, + { "BLEZ", &EmulateInstructionMIPS64::Emulate_BXX_2ops, "BLEZ rt,offset" }, + { "BLEZL", &EmulateInstructionMIPS64::Emulate_BXX_2ops, "BLEZL rt,offset" }, + { "BLTZ", &EmulateInstructionMIPS64::Emulate_BXX_2ops, "BLTZ rt,offset" }, + { "BLTZAL", &EmulateInstructionMIPS64::Emulate_Bcond_Link, "BLTZAL rt,offset" }, + { "BLTZALL", &EmulateInstructionMIPS64::Emulate_Bcond_Link, "BLTZALL rt,offset" }, + { "BLTZL", &EmulateInstructionMIPS64::Emulate_BXX_2ops, "BLTZL rt,offset" }, + { "BOVC", &EmulateInstructionMIPS64::Emulate_BXX_3ops_C, "BOVC rs,rt,offset" }, + { "BNVC", &EmulateInstructionMIPS64::Emulate_BXX_3ops_C, "BNVC rs,rt,offset" }, { "J", &EmulateInstructionMIPS64::Emulate_J, "J target" }, { "JAL", &EmulateInstructionMIPS64::Emulate_JAL, "JAL target" }, { "JALX", &EmulateInstructionMIPS64::Emulate_JAL, "JALX target" }, @@ -597,16 +597,16 @@ EmulateInstructionMIPS64::GetOpcodeForInstruction (const char *op_name) { "JIC", &EmulateInstructionMIPS64::Emulate_JIC, "JIC rt,offset" }, { "JR", &EmulateInstructionMIPS64::Emulate_JR, "JR target" }, { "JR_HB", &EmulateInstructionMIPS64::Emulate_JR, "JR.HB target" }, - { "BC1F", &EmulateInstructionMIPS64::Emulate_BC1F, "BC1F cc, offset" }, - { "BC1T", &EmulateInstructionMIPS64::Emulate_BC1T, "BC1T cc, offset" }, - { "BC1FL", &EmulateInstructionMIPS64::Emulate_BC1FL, "BC1FL cc, offset" }, - { "BC1TL", &EmulateInstructionMIPS64::Emulate_BC1TL, "BC1TL cc, offset" }, + { "BC1F", &EmulateInstructionMIPS64::Emulate_FP_branch, "BC1F cc, offset" }, + { "BC1T", &EmulateInstructionMIPS64::Emulate_FP_branch, "BC1T cc, offset" }, + { "BC1FL", &EmulateInstructionMIPS64::Emulate_FP_branch, "BC1FL cc, offset" }, + { "BC1TL", &EmulateInstructionMIPS64::Emulate_FP_branch, "BC1TL cc, offset" }, { "BC1EQZ", &EmulateInstructionMIPS64::Emulate_BC1EQZ, "BC1EQZ ft, offset" }, { "BC1NEZ", &EmulateInstructionMIPS64::Emulate_BC1NEZ, "BC1NEZ ft, offset" }, - { "BC1ANY2F", &EmulateInstructionMIPS64::Emulate_BC1ANY2F, "BC1ANY2F cc, offset" }, - { "BC1ANY2T", &EmulateInstructionMIPS64::Emulate_BC1ANY2T, "BC1ANY2T cc, offset" }, - { "BC1ANY4F", &EmulateInstructionMIPS64::Emulate_BC1ANY4F, "BC1ANY4F cc, offset" }, - { "BC1ANY4T", &EmulateInstructionMIPS64::Emulate_BC1ANY4T, "BC1ANY4T cc, offset" }, + { "BC1ANY2F", &EmulateInstructionMIPS64::Emulate_3D_branch, "BC1ANY2F cc, offset" }, + { "BC1ANY2T", &EmulateInstructionMIPS64::Emulate_3D_branch, "BC1ANY2T cc, offset" }, + { "BC1ANY4F", &EmulateInstructionMIPS64::Emulate_3D_branch, "BC1ANY4F cc, offset" }, + { "BC1ANY4T", &EmulateInstructionMIPS64::Emulate_3D_branch, "BC1ANY4T cc, offset" }, { "BNZ_B", &EmulateInstructionMIPS64::Emulate_BNZB, "BNZ.b wt,s16" }, { "BNZ_H", &EmulateInstructionMIPS64::Emulate_BNZH, "BNZ.h wt,s16" }, { "BNZ_W", &EmulateInstructionMIPS64::Emulate_BNZW, "BNZ.w wt,s16" }, @@ -907,107 +907,20 @@ EmulateInstructionMIPS64::Emulate_LD (llvm::MCInst& insn) return false; } -bool -EmulateInstructionMIPS64::Emulate_BEQ (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs, rt; - int64_t offset, pc, target, rs_val, rt_val; - - /* - * BEQ rs, rt, offset - * condition <- (GPR[rs] = GPR[rt]) - * if condition then - * PC = PC + sign_ext (offset << 2) - * else - * PC = PC + 4 - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg()); - offset = insn.getOperand(2).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - rt_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success); - if (!success) - return false; - - if (rs_val == rt_val) - target = pc + offset; - else - target = pc + 8; - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BNE (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs, rt; - int64_t offset, pc, target, rs_val, rt_val; - - /* - * BNE rs, rt, offset - * condition <- (GPR[rs] != GPR[rt]) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg()); - offset = insn.getOperand(2).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - rt_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success); - if (!success) - return false; - - if (rs_val != rt_val) - target = pc + offset; - else - target = pc + 8; - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} +/* + Emulate below MIPS branch instructions. + BEQ, BNE : Branch on condition + BEQL, BNEL : Branch likely +*/ bool -EmulateInstructionMIPS64::Emulate_BEQL (llvm::MCInst& insn) +EmulateInstructionMIPS64::Emulate_BXX_3ops (llvm::MCInst& insn) { bool success = false; uint32_t rs, rt; - int64_t offset, pc, target, rs_val, rt_val; + int64_t offset, pc, rs_val, rt_val, target = 0; + const char *op_name = m_insn_info->getName (insn.getOpcode ()); - /* - * BEQL rs, rt, offset - * condition <- (GPR[rs] = GPR[rt]) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg()); offset = insn.getOperand(2).getImm(); @@ -1024,290 +937,26 @@ EmulateInstructionMIPS64::Emulate_BEQL (llvm::MCInst& insn) if (!success) return false; - if (rs_val == rt_val) - target = pc + offset; - else - target = pc + 8; /* skip delay slot */ - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BNEL (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs, rt; - int64_t offset, pc, target, rs_val, rt_val; - - /* - * BNEL rs, rt, offset - * condition <- (GPR[rs] != GPR[rt]) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg()); - offset = insn.getOperand(2).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - rt_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success); - if (!success) - return false; - - if (rs_val != rt_val) - target = pc + offset; - else - target = pc + 8; /* skip delay slot */ - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BGEZL (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - int64_t rs_val; - - /* - * BGEZL rs, offset - * condition <- (GPR[rs] >= 0) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - if (rs_val >= 0) - target = pc + offset; - else - target = pc + 8; /* skip delay slot */ - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BLTZL (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - int64_t rs_val; - - /* - * BLTZL rs, offset - * condition <- (GPR[rs] < 0) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - if (rs_val < 0) - target = pc + offset; - else - target = pc + 8; /* skip delay slot */ - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BGTZL (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - int64_t rs_val; - - /* - * BGTZL rs, offset - * condition <- (GPR[rs] > 0) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - if (rs_val > 0) - target = pc + offset; - else - target = pc + 8; /* skip delay slot */ - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BLEZL (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - int64_t rs_val; - - /* - * BLEZL rs, offset - * condition <- (GPR[rs] <= 0) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - if (rs_val <= 0) - target = pc + offset; - else - target = pc + 8; /* skip delay slot */ - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BGTZ (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - int64_t rs_val; - - /* - * BGTZ rs, offset - * condition <- (GPR[rs] > 0) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - if (rs_val > 0) - target = pc + offset; - else - target = pc + 8; - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BLEZ (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - int64_t rs_val; - - /* - * BLEZ rs, offset - * condition <- (GPR[rs] <= 0) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - if (rs_val <= 0) - target = pc + offset; - else - target = pc + 8; + if (!strcasecmp (op_name, "BEQ") || + !strcasecmp (op_name, "BEQL")) + { + if (rs_val == rt_val) + target = pc + offset; + else + target = pc + 8; + } + else if (!strcasecmp (op_name, "BNE") || + !strcasecmp (op_name, "BNEL")) + { + if (rs_val != rt_val) + target = pc + offset; + else + target = pc + 8; + } Context context; context.type = eContextRelativeBranchImmediate; + context.SetImmediate (offset); if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) return false; @@ -1315,20 +964,20 @@ EmulateInstructionMIPS64::Emulate_BLEZ (llvm::MCInst& insn) return true; } +/* + Emulate below MIPS Non-Compact conditional branch and link instructions. + BLTZAL, BGEZAL : + BLTZALL, BGEZALL : Branch likely +*/ bool -EmulateInstructionMIPS64::Emulate_BLTZ (llvm::MCInst& insn) +EmulateInstructionMIPS64::Emulate_Bcond_Link (llvm::MCInst& insn) { bool success = false; uint32_t rs; - int64_t offset, pc, target; + int64_t offset, pc, target = 0; int64_t rs_val; + const char *op_name = m_insn_info->getName (insn.getOpcode ()); - /* - * BLTZ rs, offset - * condition <- (GPR[rs] < 0) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); offset = insn.getOperand(1).getImm(); @@ -1340,52 +989,24 @@ EmulateInstructionMIPS64::Emulate_BLTZ (llvm::MCInst& insn) if (!success) return false; - if (rs_val < 0) - target = pc + offset; - else - target = pc + 8; - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BGEZALL (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - int64_t rs_val; - - /* - * BGEZALL rt, offset - * condition <- (GPR[rs] >= 0) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - if (rs_val >= 0) - target = pc + offset; - else - target = pc + 8; /* skip delay slot */ + if (!strcasecmp (op_name, "BLTZAL") || + !strcasecmp (op_name, "BLTZALL")) + { + if (rs_val < 0) + target = pc + offset; + else + target = pc + 8; + } + else if (!strcasecmp (op_name, "BGEZAL") || + !strcasecmp (op_name, "BGEZALL")) + { + if (rs_val >= 0) + target = pc + offset; + else + target = pc + 8; + } Context context; - context.type = eContextRelativeBranchImmediate; if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) return false; @@ -1458,65 +1079,18 @@ EmulateInstructionMIPS64::Emulate_BALC (llvm::MCInst& insn) return true; } +/* + Emulate below MIPS conditional branch and link instructions. + BLEZALC, BGEZALC, BLTZALC, BGTZALC, BEQZALC, BNEZALC : Compact branches +*/ bool -EmulateInstructionMIPS64::Emulate_BGEZAL (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - int64_t rs_val; - - /* - * BGEZAL rs,offset - * offset = sign_ext (offset << 2) - * condition <- (GPR[rs] >= 0) - * if condition then - * RA = PC + 8 - * PC = PC + offset - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - Context context; - - if ((int64_t) rs_val >= 0) - target = pc + offset; - else - target = pc + 8; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_ra_mips64, pc + 8)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BLTZAL (llvm::MCInst& insn) +EmulateInstructionMIPS64::Emulate_Bcond_Link_C (llvm::MCInst& insn) { bool success = false; uint32_t rs; - int64_t offset, pc, target; - int64_t rs_val; + int64_t offset, pc, rs_val, target = 0; + const char *op_name = m_insn_info->getName (insn.getOpcode ()); - /* - * BLTZAL rs,offset - * offset = sign_ext (offset << 2) - * condition <- (GPR[rs] < 0) - * if condition then - * RA = PC + 8 - * PC = PC + offset - */ rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); offset = insn.getOperand(1).getImm(); @@ -1528,271 +1102,51 @@ EmulateInstructionMIPS64::Emulate_BLTZAL (llvm::MCInst& insn) if (!success) return false; - Context context; - - if ((int64_t) rs_val < 0) - target = pc + offset; - else - target = pc + 8; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_ra_mips64, pc + 8)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BLTZALL (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - int64_t rs_val; - - /* - * BLTZALL rs,offset - * offset = sign_ext (offset << 2) - * condition <- (GPR[rs] < 0) - * if condition then - * RA = PC + 8 - * PC = PC + offset - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - Context context; - - if (rs_val < 0) - target = pc + offset; - else - target = pc + 8; /* skip delay slot */ - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_ra_mips64, pc + 8)) - return false; - - return true; -} - - -bool -EmulateInstructionMIPS64::Emulate_BLEZALC (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - int64_t rs_val; - - /* - * BLEZALC rs,offset - * offset = sign_ext (offset << 2) - * condition <- (GPR[rs] <= 0) - * if condition then - * RA = PC + 4 - * PC = PC + offset - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - Context context; - - if (rs_val <= 0) - target = pc + offset; - else - target = pc + 4; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_ra_mips64, pc + 4)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BGEZALC (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - int64_t rs_val; - - /* - * BGEZALC rs,offset - * offset = sign_ext (offset << 2) - * condition <- (GPR[rs] >= 0) - * if condition then - * RA = PC + 4 - * PC = PC + offset - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - Context context; - - if (rs_val >= 0) - target = pc + offset; - else - target = pc + 4; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_ra_mips64, pc + 4)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BLTZALC (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - int64_t rs_val; - - /* - * BLTZALC rs,offset - * offset = sign_ext (offset << 2) - * condition <- (GPR[rs] < 0) - * if condition then - * RA = PC + 4 - * PC = PC + offset - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - Context context; - - if (rs_val < 0) - target = pc + offset; - else - target = pc + 4; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_ra_mips64, pc + 4)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BGTZALC (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - int64_t rs_val; - - /* - * BGTZALC rs,offset - * offset = sign_ext (offset << 2) - * condition <- (GPR[rs] > 0) - * if condition then - * RA = PC + 4 - * PC = PC + offset - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - Context context; - - if (rs_val > 0) - target = pc + offset; - else - target = pc + 4; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_ra_mips64, pc + 4)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BEQZALC (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target, rs_val; - - /* - * BEQZALC rs,offset - * offset = sign_ext (offset << 2) - * condition <- (GPR[rs] == 0) - * if condition then - * RA = PC + 4 - * PC = PC + offset - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; + if (!strcasecmp (op_name, "BLEZALC")) + { + if (rs_val <= 0) + target = pc + offset; + else + target = pc + 4; + } + else if (!strcasecmp (op_name, "BGEZALC")) + { + if (rs_val >= 0) + target = pc + offset; + else + target = pc + 4; + } + else if (!strcasecmp (op_name, "BLTZALC")) + { + if (rs_val < 0) + target = pc + offset; + else + target = pc + 4; + } + else if (!strcasecmp (op_name, "BGTZALC")) + { + if (rs_val > 0) + target = pc + offset; + else + target = pc + 4; + } + else if (!strcasecmp (op_name, "BEQZALC")) + { + if (rs_val == 0) + target = pc + offset; + else + target = pc + 4; + } + else if (!strcasecmp (op_name, "BNEZALC")) + { + if (rs_val != 0) + target = pc + offset; + else + target = pc + 4; + } Context context; - if (rs_val == 0) - target = pc + offset; - else - target = pc + 4; - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) return false; @@ -1802,21 +1156,19 @@ EmulateInstructionMIPS64::Emulate_BEQZALC (llvm::MCInst& insn) return true; } +/* + Emulate below MIPS branch instructions. + BLTZL, BGEZL, BGTZL, BLEZL : Branch likely + BLTZ, BGEZ, BGTZ, BLEZ : Non-compact branches +*/ bool -EmulateInstructionMIPS64::Emulate_BNEZALC (llvm::MCInst& insn) +EmulateInstructionMIPS64::Emulate_BXX_2ops (llvm::MCInst& insn) { bool success = false; uint32_t rs; - int64_t offset, pc, target, rs_val; + int64_t offset, pc, rs_val, target = 0; + const char *op_name = m_insn_info->getName (insn.getOpcode ()); - /* - * BNEZALC rs,offset - * offset = sign_ext (offset << 2) - * condition <- (GPR[rs] != 0) - * if condition then - * RA = PC + 4 - * PC = PC + offset - */ rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); offset = insn.getOperand(1).getImm(); @@ -1828,53 +1180,42 @@ EmulateInstructionMIPS64::Emulate_BNEZALC (llvm::MCInst& insn) if (!success) return false; - Context context; - - if (rs_val != 0) - target = pc + offset; - else - target = pc + 4; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_ra_mips64, pc + 4)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BGEZ (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target, rs_val; - - /* - * BGEZ rs,offset - * offset = sign_ext (offset << 2) - * condition <- (GPR[rs] >= 0) - * if condition then - * PC = PC + offset - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; + if (!strcasecmp (op_name, "BLTZL") || + !strcasecmp (op_name, "BLTZ")) + { + if (rs_val < 0) + target = pc + offset; + else + target = pc + 8; + } + else if (!strcasecmp (op_name, "BGEZL") || + !strcasecmp (op_name, "BGEZ")) + { + if (rs_val >= 0) + target = pc + offset; + else + target = pc + 8; + } + else if (!strcasecmp (op_name, "BGTZL") || + !strcasecmp (op_name, "BGTZ")) + { + if (rs_val > 0) + target = pc + offset; + else + target = pc + 8; + } + else if (!strcasecmp (op_name, "BLEZL") || + !strcasecmp (op_name, "BLEZ")) + { + if (rs_val <= 0) + target = pc + offset; + else + target = pc + 8; + } Context context; - - if (rs_val >= 0) - target = pc + offset; - else - target = pc + 8; + context.type = eContextRelativeBranchImmediate; + context.SetImmediate (offset); if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) return false; @@ -1909,502 +1250,6 @@ EmulateInstructionMIPS64::Emulate_BC (llvm::MCInst& insn) return true; } -bool -EmulateInstructionMIPS64::Emulate_BEQC (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs, rt; - int64_t offset, pc, target, rs_val, rt_val; - - /* - * BEQC rs, rt, offset - * condition <- (GPR[rs] = GPR[rt]) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg()); - offset = insn.getOperand(2).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - rt_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success); - if (!success) - return false; - - if (rs_val == rt_val) - target = pc + 4 + offset; - else - target = pc + 4; - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BNEC (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs, rt; - int64_t offset, pc, target, rs_val, rt_val; - - /* - * BNEC rs, rt, offset - * condition <- (GPR[rs] != GPR[rt]) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg()); - offset = insn.getOperand(2).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - rt_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success); - if (!success) - return false; - - if (rs_val != rt_val) - target = pc + 4 + offset; - else - target = pc + 4; - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BLTC (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs, rt; - int64_t offset, pc, target; - int64_t rs_val, rt_val; - - /* - * BLTC rs, rt, offset - * condition <- (GPR[rs] < GPR[rt]) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg()); - offset = insn.getOperand(2).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - rt_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success); - if (!success) - return false; - - if (rs_val < rt_val) - target = pc + 4 + offset; - else - target = pc + 4; - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BGEC (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs, rt; - int64_t offset, pc, target; - int64_t rs_val, rt_val; - - /* - * BGEC rs, rt, offset - * condition <- (GPR[rs] > GPR[rt]) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg()); - offset = insn.getOperand(2).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - rt_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success); - if (!success) - return false; - - if (rs_val > rt_val) - target = pc + 4 + offset; - else - target = pc + 4; - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BLTUC (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs, rt; - int64_t offset, pc, target; - uint64_t rs_val, rt_val; - - /* - * BLTUC rs, rt, offset - * condition <- (GPR[rs] < GPR[rt]) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg()); - offset = insn.getOperand(2).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - rt_val = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success); - if (!success) - return false; - - if (rs_val < rt_val) - target = pc + 4 + offset; - else - target = pc + 4; - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BGEUC (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs, rt; - int64_t offset, pc, target; - uint64_t rs_val, rt_val; - - /* - * BGEUC rs, rt, offset - * condition <- (GPR[rs] > GPR[rt]) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg()); - offset = insn.getOperand(2).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - rt_val = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success); - if (!success) - return false; - - if (rs_val > rt_val) - target = pc + 4 + offset; - else - target = pc + 4; - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BLTZC (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - int64_t rs_val; - - /* - * BLTZC rs, offset - * condition <- (GPR[rs] < 0) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - if (rs_val < 0) - target = pc + 4 + offset; - else - target = pc + 4; - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BLEZC (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - int64_t rs_val; - - /* - * BLEZC rs, offset - * condition <- (GPR[rs] <= 0) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - if (rs_val <= 0) - target = pc + 4 + offset; - else - target = pc + 4; - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BGEZC (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - int64_t rs_val; - - /* - * BGEZC rs, offset - * condition <- (GPR[rs] >= 0) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - if (rs_val >= 0) - target = pc + 4 + offset; - else - target = pc + 4; - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BGTZC (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - int64_t rs_val; - - /* - * BGTZC rs, offset - * condition <- (GPR[rs] > 0) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - if (rs_val > 0) - target = pc + 4 + offset; - else - target = pc + 4; - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BEQZC (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - uint64_t rs_val; - - /* - * BEQZC rs, offset - * condition <- (GPR[rs] = 0) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - if (rs_val == 0) - target = pc + 4 + offset; - else - target = pc + 4; - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BNEZC (llvm::MCInst& insn) -{ - bool success = false; - uint32_t rs; - int64_t offset, pc, target; - uint64_t rs_val; - - /* - * BNEZC rs, offset - * condition <- (GPR[rs] != 0) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ - rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - rs_val = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success); - if (!success) - return false; - - if (rs_val != 0) - target = pc + 4 + offset; - else - target = pc + 4; - - Context context; - context.type = eContextRelativeBranchImmediate; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - static int IsAdd64bitOverflow (int64_t a, int64_t b) { @@ -2412,20 +1257,19 @@ IsAdd64bitOverflow (int64_t a, int64_t b) return (a < 0 && b < 0 && r >= 0) || (a >= 0 && b >= 0 && r < 0); } +/* + Emulate below MIPS branch instructions. + BEQC, BNEC, BLTC, BGEC, BLTUC, BGEUC, BOVC, BNVC: Compact branch instructions with no delay slot +*/ bool -EmulateInstructionMIPS64::Emulate_BOVC (llvm::MCInst& insn) +EmulateInstructionMIPS64::Emulate_BXX_3ops_C (llvm::MCInst& insn) { bool success = false; uint32_t rs, rt; - int64_t offset, pc, target; - int64_t rs_val, rt_val; + int64_t offset, pc, rs_val, rt_val, target = 0; + const char *op_name = m_insn_info->getName (insn.getOpcode ()); + uint32_t current_inst_size = m_insn_info->get(insn.getOpcode()).getSize(); - /* - * BOVC rs, rt, offset - * condition <- overflow(GPR[rs] + GPR[rt]) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg()); offset = insn.getOperand(2).getImm(); @@ -2442,13 +1286,66 @@ EmulateInstructionMIPS64::Emulate_BOVC (llvm::MCInst& insn) if (!success) return false; - if (IsAdd64bitOverflow (rs_val, rt_val)) - target = pc + offset; - else - target = pc + 4; + if (!strcasecmp (op_name, "BEQC")) + { + if (rs_val == rt_val) + target = pc + 4 + offset; + else + target = pc + 4; + } + else if (!strcasecmp (op_name, "BNEC")) + { + if (rs_val != rt_val) + target = pc + 4 + offset; + else + target = pc + 4; + } + else if (!strcasecmp (op_name, "BLTC")) + { + if (rs_val < rt_val) + target = pc + 4 + offset; + else + target = pc + 4; + } + else if (!strcasecmp (op_name, "BGEC")) + { + if (rs_val >= rt_val) + target = pc + 4 + offset; + else + target = pc + 4; + } + else if (!strcasecmp (op_name, "BLTUC")) + { + if (rs_val < rt_val) + target = pc + 4 + offset; + else + target = pc + 4; + } + else if (!strcasecmp (op_name, "BGEUC")) + { + if ((uint32_t)rs_val >= (uint32_t)rt_val) + target = pc + 4 + offset; + else + target = pc + 4; + } + else if (!strcasecmp (op_name, "BOVC")) + { + if (IsAdd64bitOverflow (rs_val, rt_val)) + target = pc + 4 + offset; + else + target = pc + 4; + } + else if (!strcasecmp (op_name, "BNVC")) + { + if (!IsAdd64bitOverflow (rs_val, rt_val)) + target = pc + 4 + offset; + else + target = pc + 4; + } Context context; context.type = eContextRelativeBranchImmediate; + context.SetImmediate (current_inst_size + offset); if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) return false; @@ -2456,23 +1353,22 @@ EmulateInstructionMIPS64::Emulate_BOVC (llvm::MCInst& insn) return true; } +/* + Emulate below MIPS branch instructions. + BLTZC, BLEZC, BGEZC, BGTZC, BEQZC, BNEZC : Compact Branches +*/ bool -EmulateInstructionMIPS64::Emulate_BNVC (llvm::MCInst& insn) +EmulateInstructionMIPS64::Emulate_BXX_2ops_C (llvm::MCInst& insn) { bool success = false; - uint32_t rs, rt; - int64_t offset, pc, target; - int64_t rs_val, rt_val; + uint32_t rs; + int64_t offset, pc, target = 0; + int64_t rs_val; + const char *op_name = m_insn_info->getName (insn.getOpcode ()); + uint32_t current_inst_size = m_insn_info->get(insn.getOpcode()).getSize(); - /* - * BNVC rs, rt, offset - * condition <- overflow(GPR[rs] + GPR[rt]) - * if condition then - * PC = PC + sign_ext (offset << 2) - */ rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg()); - offset = insn.getOperand(2).getImm(); + offset = insn.getOperand(1).getImm(); pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); if (!success) @@ -2482,17 +1378,52 @@ EmulateInstructionMIPS64::Emulate_BNVC (llvm::MCInst& insn) if (!success) return false; - rt_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success); - if (!success) - return false; - - if (! IsAdd64bitOverflow (rs_val, rt_val)) - target = pc + offset; - else - target = pc + 4; + if (!strcasecmp (op_name, "BLTZC")) + { + if (rs_val < 0) + target = pc + 4 + offset; + else + target = pc + 4; + } + else if (!strcasecmp (op_name, "BLEZC")) + { + if (rs_val <= 0) + target = pc + 4 + offset; + else + target = pc + 4; + } + else if (!strcasecmp (op_name, "BGEZC")) + { + if (rs_val >= 0) + target = pc + 4 + offset; + else + target = pc + 4; + } + else if (!strcasecmp (op_name, "BGTZC")) + { + if (rs_val > 0) + target = pc + 4 + offset; + else + target = pc + 4; + } + else if (!strcasecmp (op_name, "BEQZC")) + { + if (rs_val == 0) + target = pc + 4 + offset; + else + target = pc + 4; + } + else if (!strcasecmp (op_name, "BNEZC")) + { + if (rs_val != 0) + target = pc + 4 + offset; + else + target = pc + 4; + } Context context; context.type = eContextRelativeBranchImmediate; + context.SetImmediate (current_inst_size + offset); if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) return false; @@ -2684,95 +1615,19 @@ EmulateInstructionMIPS64::Emulate_JR (llvm::MCInst& insn) return true; } +/* + Emulate Branch on FP True/False + BC1F, BC1FL : Branch on FP False (L stands for branch likely) + BC1T, BC1TL : Branch on FP True (L stands for branch likely) +*/ bool -EmulateInstructionMIPS64::Emulate_BC1F (llvm::MCInst& insn) +EmulateInstructionMIPS64::Emulate_FP_branch (llvm::MCInst& insn) { bool success = false; uint32_t cc, fcsr; - int64_t target, pc, offset; - - /* - * BC1F cc, offset - * condition <- (FPConditionCode(cc) == 0) - * if condition then - * offset = sign_ext (offset) - * PC = PC + offset - */ - cc = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - fcsr = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_fcsr_mips64, 0, &success); - if (!success) - return false; - - /* fcsr[23], fcsr[25-31] are vaild condition bits */ - fcsr = ((fcsr >> 24) & 0xfe) | ((fcsr >> 23) & 0x01); - - if ((fcsr & (1 << cc)) == 0) - target = pc + offset; - else - target = pc + 8; - - Context context; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; + int64_t pc, offset, target = 0; + const char *op_name = m_insn_info->getName (insn.getOpcode ()); - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BC1T (llvm::MCInst& insn) -{ - bool success = false; - uint32_t cc, fcsr; - int64_t target, pc, offset; - - /* - * BC1T cc, offset - * condition <- (FPConditionCode(cc) != 0) - * if condition then - * offset = sign_ext (offset) - * PC = PC + offset - */ - cc = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - fcsr = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_fcsr_mips64, 0, &success); - if (!success) - return false; - - /* fcsr[23], fcsr[25-31] are vaild condition bits */ - fcsr = ((fcsr >> 24) & 0xfe) | ((fcsr >> 23) & 0x01); - - if ((fcsr & (1 << cc)) != 0) - target = pc + offset; - else - target = pc + 8; - - Context context; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BC1FL (llvm::MCInst& insn) -{ - bool success = false; - uint32_t cc, fcsr; - int64_t target, pc, offset; - /* * BC1F cc, offset * condition <- (FPConditionCode(cc) == 0) @@ -2782,48 +1637,7 @@ EmulateInstructionMIPS64::Emulate_BC1FL (llvm::MCInst& insn) */ cc = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - fcsr = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_fcsr_mips64, 0, &success); - if (!success) - return false; - - /* fcsr[23], fcsr[25-31] are vaild condition bits */ - fcsr = ((fcsr >> 24) & 0xfe) | ((fcsr >> 23) & 0x01); - - if ((fcsr & (1 << cc)) == 0) - target = pc + offset; - else - target = pc + 8; /* skip delay slot */ - - Context context; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BC1TL (llvm::MCInst& insn) -{ - bool success = false; - uint32_t cc, fcsr; - int64_t target, pc, offset; - - /* - * BC1T cc, offset - * condition <- (FPConditionCode(cc) != 0) - * if condition then - * offset = sign_ext (offset) - * PC = PC + offset - */ - cc = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); if (!success) return false; @@ -2835,11 +1649,23 @@ EmulateInstructionMIPS64::Emulate_BC1TL (llvm::MCInst& insn) /* fcsr[23], fcsr[25-31] are vaild condition bits */ fcsr = ((fcsr >> 24) & 0xfe) | ((fcsr >> 23) & 0x01); - if ((fcsr & (1 << cc)) != 0) - target = pc + offset; - else - target = pc + 8; /* skip delay slot */ - + if (!strcasecmp (op_name, "BC1F") || + !strcasecmp (op_name, "BC1FL")) + { + if ((fcsr & (1 << cc)) == 0) + target = pc + offset; + else + target = pc + 8; + } + else if (!strcasecmp (op_name, "BC1T") || + !strcasecmp (op_name, "BC1TL")) + { + if ((fcsr & (1 << cc)) != 0) + target = pc + offset; + else + target = pc + 8; + } + Context context; if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) @@ -2926,109 +1752,19 @@ EmulateInstructionMIPS64::Emulate_BC1NEZ (llvm::MCInst& insn) return true; } +/* + Emulate MIPS-3D Branch instructions + BC1ANY2F, BC1ANY2T : Branch on Any of Two Floating Point Condition Codes False/True + BC1ANY4F, BC1ANY4T : Branch on Any of Four Floating Point Condition Codes False/True +*/ bool -EmulateInstructionMIPS64::Emulate_BC1ANY2F (llvm::MCInst& insn) +EmulateInstructionMIPS64::Emulate_3D_branch (llvm::MCInst& insn) { bool success = false; uint32_t cc, fcsr; - int64_t target, pc, offset; - - /* - * BC1ANY2F cc, offset - * condition <- (FPConditionCode(cc) == 0 - * || FPConditionCode(cc+1) == 0) - * if condition then - * offset = sign_ext (offset) - * PC = PC + offset - */ - cc = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - fcsr = (uint32_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_fcsr_mips64, 0, &success); - if (!success) - return false; - - /* fcsr[23], fcsr[25-31] are vaild condition bits */ - fcsr = ((fcsr >> 24) & 0xfe) | ((fcsr >> 23) & 0x01); - - /* if any one bit is 0 */ - if (((fcsr >> cc) & 3) != 3) - target = pc + offset; - else - target = pc + 8; - - Context context; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BC1ANY2T (llvm::MCInst& insn) -{ - bool success = false; - uint32_t cc, fcsr; - int64_t target, pc, offset; - - /* - * BC1ANY2T cc, offset - * condition <- (FPConditionCode(cc) == 1 - * || FPConditionCode(cc+1) == 1) - * if condition then - * offset = sign_ext (offset) - * PC = PC + offset - */ - cc = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - fcsr = (uint32_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_fcsr_mips64, 0, &success); - if (!success) - return false; - - /* fcsr[23], fcsr[25-31] are vaild condition bits */ - fcsr = ((fcsr >> 24) & 0xfe) | ((fcsr >> 23) & 0x01); - - /* if any one bit is 1 */ - if (((fcsr >> cc) & 3) != 0) - target = pc + offset; - else - target = pc + 8; - - Context context; + int64_t pc, offset, target = 0; + const char *op_name = m_insn_info->getName (insn.getOpcode ()); - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BC1ANY4F (llvm::MCInst& insn) -{ - bool success = false; - uint32_t cc, fcsr; - int64_t target, pc, offset; - - /* - * BC1ANY4F cc, offset - * condition <- (FPConditionCode(cc) == 0 - * || FPConditionCode(cc+1) == 0) - * || FPConditionCode(cc+2) == 0) - * || FPConditionCode(cc+3) == 0) - * if condition then - * offset = sign_ext (offset) - * PC = PC + offset - */ cc = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); offset = insn.getOperand(1).getImm(); @@ -3043,57 +1779,39 @@ EmulateInstructionMIPS64::Emulate_BC1ANY4F (llvm::MCInst& insn) /* fcsr[23], fcsr[25-31] are vaild condition bits */ fcsr = ((fcsr >> 24) & 0xfe) | ((fcsr >> 23) & 0x01); - /* if any one bit is 0 */ - if (((fcsr >> cc) & 0xf) != 0xf) - target = pc + offset; - else - target = pc + 8; - - Context context; - - if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) - return false; - - return true; -} - -bool -EmulateInstructionMIPS64::Emulate_BC1ANY4T (llvm::MCInst& insn) -{ - bool success = false; - uint32_t cc, fcsr; - int64_t target, pc, offset; - - /* - * BC1ANY4T cc, offset - * condition <- (FPConditionCode(cc) == 1 - * || FPConditionCode(cc+1) == 1) - * || FPConditionCode(cc+2) == 1) - * || FPConditionCode(cc+3) == 1) - * if condition then - * offset = sign_ext (offset) - * PC = PC + offset - */ - cc = m_reg_info->getEncodingValue (insn.getOperand(0).getReg()); - offset = insn.getOperand(1).getImm(); - - pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success); - if (!success) - return false; - - fcsr = (uint32_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_fcsr_mips64, 0, &success); - if (!success) - return false; - - /* fcsr[23], fcsr[25-31] are vaild condition bits */ - fcsr = ((fcsr >> 24) & 0xfe) | ((fcsr >> 23) & 0x01); + if (!strcasecmp (op_name, "BC1ANY2F")) + { + /* if any one bit is 0 */ + if (((fcsr >> cc) & 3) != 3) + target = pc + offset; + else + target = pc + 8; + } + else if (!strcasecmp (op_name, "BC1ANY2T")) + { + /* if any one bit is 1 */ + if (((fcsr >> cc) & 3) != 0) + target = pc + offset; + else + target = pc + 8; + } + else if (!strcasecmp (op_name, "BC1ANY4F")) + { + /* if any one bit is 0 */ + if (((fcsr >> cc) & 0xf) != 0xf) + target = pc + offset; + else + target = pc + 8; + } + else if (!strcasecmp (op_name, "BC1ANY4T")) + { + /* if any one bit is 1 */ + if (((fcsr >> cc) & 0xf) != 0) + target = pc + offset; + else + target = pc + 8; + } - /* if any one bit is 1 */ - if (((fcsr >> cc) & 0xf) != 0) - target = pc + offset; - else - target = pc + 8; - Context context; if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target)) diff --git a/contrib/llvm/tools/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.h b/contrib/llvm/tools/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.h index e0b2079..4ca274c 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.h +++ b/contrib/llvm/tools/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.h @@ -133,121 +133,37 @@ protected: Emulate_LDST_Reg (llvm::MCInst& insn); bool - Emulate_BEQ (llvm::MCInst& insn); + Emulate_BXX_3ops (llvm::MCInst& insn); bool - Emulate_BNE (llvm::MCInst& insn); + Emulate_BXX_3ops_C (llvm::MCInst& insn); bool - Emulate_BEQL (llvm::MCInst& insn); + Emulate_BXX_2ops (llvm::MCInst& insn); bool - Emulate_BNEL (llvm::MCInst& insn); + Emulate_BXX_2ops_C (llvm::MCInst& insn); bool - Emulate_BGEZALL (llvm::MCInst& insn); + Emulate_Bcond_Link_C (llvm::MCInst& insn); bool - Emulate_BAL (llvm::MCInst& insn); - - bool - Emulate_BGEZAL (llvm::MCInst& insn); - - bool - Emulate_BALC (llvm::MCInst& insn); - - bool - Emulate_BC (llvm::MCInst& insn); - - bool - Emulate_BGEZ (llvm::MCInst& insn); - - bool - Emulate_BLEZALC (llvm::MCInst& insn); - - bool - Emulate_BGEZALC (llvm::MCInst& insn); - - bool - Emulate_BLTZALC (llvm::MCInst& insn); - - bool - Emulate_BGTZALC (llvm::MCInst& insn); - - bool - Emulate_BEQZALC (llvm::MCInst& insn); - - bool - Emulate_BNEZALC (llvm::MCInst& insn); - - bool - Emulate_BEQC (llvm::MCInst& insn); - - bool - Emulate_BNEC (llvm::MCInst& insn); - - bool - Emulate_BLTC (llvm::MCInst& insn); - - bool - Emulate_BGEC (llvm::MCInst& insn); - - bool - Emulate_BLTUC (llvm::MCInst& insn); - - bool - Emulate_BGEUC (llvm::MCInst& insn); - - bool - Emulate_BLTZC (llvm::MCInst& insn); - - bool - Emulate_BLEZC (llvm::MCInst& insn); + Emulate_Bcond_Link (llvm::MCInst& insn); bool - Emulate_BGEZC (llvm::MCInst& insn); + Emulate_FP_branch (llvm::MCInst& insn); bool - Emulate_BGTZC (llvm::MCInst& insn); + Emulate_3D_branch (llvm::MCInst& insn); bool - Emulate_BEQZC (llvm::MCInst& insn); - - bool - Emulate_BNEZC (llvm::MCInst& insn); - - bool - Emulate_BGEZL (llvm::MCInst& insn); - - bool - Emulate_BGTZ (llvm::MCInst& insn); - - bool - Emulate_BGTZL (llvm::MCInst& insn); - - bool - Emulate_BLEZ (llvm::MCInst& insn); - - bool - Emulate_BLEZL (llvm::MCInst& insn); - - bool - Emulate_BLTZ (llvm::MCInst& insn); - - bool - Emulate_BLTZAL (llvm::MCInst& insn); - - bool - Emulate_BLTZALL (llvm::MCInst& insn); - - bool - Emulate_BLTZL (llvm::MCInst& insn); + Emulate_BAL (llvm::MCInst& insn); bool - Emulate_BOVC (llvm::MCInst& insn); + Emulate_BALC (llvm::MCInst& insn); bool - Emulate_BNVC (llvm::MCInst& insn); + Emulate_BC (llvm::MCInst& insn); bool Emulate_J (llvm::MCInst& insn); @@ -268,36 +184,12 @@ protected: Emulate_JR (llvm::MCInst& insn); bool - Emulate_BC1F (llvm::MCInst& insn); - - bool - Emulate_BC1T (llvm::MCInst& insn); - - bool - Emulate_BC1FL (llvm::MCInst& insn); - - bool - Emulate_BC1TL (llvm::MCInst& insn); - - bool Emulate_BC1EQZ (llvm::MCInst& insn); bool Emulate_BC1NEZ (llvm::MCInst& insn); bool - Emulate_BC1ANY2F (llvm::MCInst& insn); - - bool - Emulate_BC1ANY2T (llvm::MCInst& insn); - - bool - Emulate_BC1ANY4F (llvm::MCInst& insn); - - bool - Emulate_BC1ANY4T (llvm::MCInst& insn); - - bool Emulate_BNZB (llvm::MCInst& insn); bool diff --git a/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index a554aa5..09031e2 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -296,6 +296,22 @@ CPlusPlusLanguage::MethodName::GetQualifiers () return m_qualifiers; } +std::string +CPlusPlusLanguage::MethodName::GetScopeQualifiedName () +{ + if (!m_parsed) + Parse(); + if (m_basename.empty() || m_context.empty()) + return std::string(); + + std::string res; + res += m_context; + res += "::"; + res += m_basename; + + return res; +} + bool CPlusPlusLanguage::IsCPPMangledName (const char *name) { diff --git a/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h b/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h index 1a8c0f6..f0fc07e 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h +++ b/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h @@ -89,6 +89,9 @@ public: { return m_full; } + + std::string + GetScopeQualifiedName (); llvm::StringRef GetBasename (); diff --git a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp index a5fa004..8e2cfb5 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp @@ -319,46 +319,6 @@ ItaniumABILanguageRuntime::IsVTableName (const char *name) return false; } -static std::map<ConstString, std::vector<ConstString> >& -GetAlternateManglingPrefixes() -{ - static std::map<ConstString, std::vector<ConstString> > g_alternate_mangling_prefixes; - return g_alternate_mangling_prefixes; -} - - -size_t -ItaniumABILanguageRuntime::GetAlternateManglings(const ConstString &mangled, std::vector<ConstString> &alternates) -{ - if (!mangled) - return static_cast<size_t>(0); - - alternates.clear(); - const char *mangled_cstr = mangled.AsCString(); - std::map<ConstString, std::vector<ConstString> >& alternate_mangling_prefixes = GetAlternateManglingPrefixes(); - for (std::map<ConstString, std::vector<ConstString> >::iterator it = alternate_mangling_prefixes.begin(); - it != alternate_mangling_prefixes.end(); - ++it) - { - const char *prefix_cstr = it->first.AsCString(); - if (strncmp(mangled_cstr, prefix_cstr, strlen(prefix_cstr)) == 0) - { - const std::vector<ConstString> &alternate_prefixes = it->second; - for (size_t i = 0; i < alternate_prefixes.size(); ++i) - { - std::string alternate_mangling(alternate_prefixes[i].AsCString()); - alternate_mangling.append(mangled_cstr + strlen(prefix_cstr)); - - alternates.push_back(ConstString(alternate_mangling.c_str())); - } - - return alternates.size(); - } - } - - return static_cast<size_t>(0); -} - //------------------------------------------------------------------ // Static Functions //------------------------------------------------------------------ @@ -382,17 +342,6 @@ ItaniumABILanguageRuntime::Initialize() PluginManager::RegisterPlugin (GetPluginNameStatic(), "Itanium ABI for the C++ language", CreateInstance); - - // Alternate manglings for std::basic_string<...> - std::vector<ConstString> basic_string_alternates; - basic_string_alternates.push_back(ConstString("_ZNSs")); - basic_string_alternates.push_back(ConstString("_ZNKSs")); - std::map<ConstString, std::vector<ConstString> >& alternate_mangling_prefixes = GetAlternateManglingPrefixes(); - - alternate_mangling_prefixes[ConstString("_ZNSbIcSt17char_traits<char>St15allocator<char>E")] = - basic_string_alternates; - alternate_mangling_prefixes[ConstString("_ZNKSbIcSt17char_traits<char>St15allocator<char>E")] = - basic_string_alternates; } void diff --git a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.h b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.h index 519a3ce..c06b986 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.h +++ b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.h @@ -80,9 +80,6 @@ namespace lldb_private { lldb::SearchFilterSP CreateExceptionSearchFilter() override; - size_t - GetAlternateManglings(const ConstString &mangled, std::vector<ConstString> &alternates) override; - //------------------------------------------------------------------ // PluginInterface protocol //------------------------------------------------------------------ diff --git a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.cpp b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.cpp index cdb9525..2810b24 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.cpp @@ -69,7 +69,20 @@ AppleObjCRuntime::GetObjectDescription (Stream &str, ValueObject &valobj) if (!valobj.ResolveValue(val.GetScalar())) return false; - ExecutionContext exe_ctx (valobj.GetExecutionContextRef()); + // Value Objects may not have a process in their ExecutionContextRef. But we need to have one + // in the ref we pass down to eventually call description. Get it from the target if it isn't + // present. + ExecutionContext exe_ctx; + if (valobj.GetProcessSP()) + { + exe_ctx = ExecutionContext(valobj.GetExecutionContextRef()); + } + else + { + exe_ctx.SetContext(valobj.GetTargetSP(), true); + if (!exe_ctx.HasProcessScope()) + return false; + } return GetObjectDescription(str, val, exe_ctx.GetBestExecutionContextScope()); } diff --git a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp index 8c485d9..5844494 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp @@ -405,9 +405,18 @@ AppleObjCRuntimeV2::GetDynamicTypeAndAddress (ValueObject &in_value, Address &address, Value::ValueType &value_type) { - // The Runtime is attached to a particular process, you shouldn't pass in a value from another process. - assert (in_value.GetProcessSP().get() == m_process); + // We should never get here with a null process... assert (m_process != NULL); + + // The Runtime is attached to a particular process, you shouldn't pass in a value from another process. + // Note, however, the process might be NULL (e.g. if the value was made with SBTarget::EvaluateExpression...) + // in which case it is sufficient if the target's match: + + Process *process = in_value.GetProcessSP().get(); + if (process) + assert (process == m_process); + else + assert (in_value.GetTargetSP().get() == m_process->CalculateTarget().get()); class_type_or_name.Clear(); value_type = Value::ValueType::eValueTypeScalar; diff --git a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp index 5d82ded..8e5d31b 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp @@ -235,16 +235,28 @@ struct RenderScriptRuntime::AllocationDetails } }; - // Header for reading and writing allocation contents - // to a binary file. + // The FileHeader struct specifies the header we use for writing allocations to a binary file. + // Our format begins with the ASCII characters "RSAD", identifying the file as an allocation dump. + // Member variables dims and hdr_size are then written consecutively, immediately followed by an instance of + // the ElementHeader struct. Because Elements can contain subelements, there may be more than one instance + // of the ElementHeader struct. With this first instance being the root element, and the other instances being + // the root's descendants. To identify which instances are an ElementHeader's children, each struct + // is immediately followed by a sequence of consecutive offsets to the start of its child structs. + // These offsets are 4 bytes in size, and the 0 offset signifies no more children. struct FileHeader { uint8_t ident[4]; // ASCII 'RSAD' identifying the file - uint16_t hdr_size; // Header size in bytes, for backwards compatability - uint16_t type; // DataType enum - uint32_t kind; // DataKind enum uint32_t dims[3]; // Dimensions - uint32_t element_size; // Size of a single element, including padding + uint16_t hdr_size; // Header size in bytes, including all element headers + }; + + struct ElementHeader + { + uint16_t type; // DataType enum + uint32_t kind; // DataKind enum + uint32_t element_size; // Size of a single element, including padding + uint16_t vector_size; // Vector width + uint32_t array_size; // Number of elements in array }; // Monotonically increasing from 1 @@ -286,7 +298,6 @@ struct RenderScriptRuntime::AllocationDetails } }; - const ConstString & RenderScriptRuntime::Element::GetFallbackStructName() { @@ -2084,37 +2095,62 @@ RenderScriptRuntime::LoadAllocation(Stream &strm, const uint32_t alloc_id, const // Cast start of buffer to FileHeader and use pointer to read metadata void* file_buffer = data_sp->GetBytes(); - const AllocationDetails::FileHeader* head = static_cast<AllocationDetails::FileHeader*>(file_buffer); + if (file_buffer == NULL || data_sp->GetByteSize() < + (sizeof(AllocationDetails::FileHeader) + sizeof(AllocationDetails::ElementHeader))) + { + strm.Printf("Error: File %s does not contain enough data for header", filename); + strm.EOL(); + return false; + } + const AllocationDetails::FileHeader* file_header = static_cast<AllocationDetails::FileHeader*>(file_buffer); - // Advance buffer past header - file_buffer = static_cast<uint8_t*>(file_buffer) + head->hdr_size; + // Check file starts with ascii characters "RSAD" + if (file_header->ident[0] != 'R' || file_header->ident[1] != 'S' || file_header->ident[2] != 'A' + || file_header->ident[3] != 'D') + { + strm.Printf("Error: File doesn't contain identifier for an RS allocation dump. Are you sure this is the correct file?"); + strm.EOL(); + return false; + } + + // Look at the type of the root element in the header + AllocationDetails::ElementHeader root_element_header; + memcpy(&root_element_header, static_cast<uint8_t*>(file_buffer) + sizeof(AllocationDetails::FileHeader), + sizeof(AllocationDetails::ElementHeader)); if (log) log->Printf("RenderScriptRuntime::LoadAllocation - header type %u, element size %u", - head->type, head->element_size); + root_element_header.type, root_element_header.element_size); // Check if the target allocation and file both have the same number of bytes for an Element - if (*alloc->element.datum_size.get() != head->element_size) + if (*alloc->element.datum_size.get() != root_element_header.element_size) { strm.Printf("Warning: Mismatched Element sizes - file %u bytes, allocation %u bytes", - head->element_size, *alloc->element.datum_size.get()); + root_element_header.element_size, *alloc->element.datum_size.get()); strm.EOL(); } - // Check if the target allocation and file both have the same integral type - const unsigned int type = static_cast<unsigned int>(*alloc->element.type.get()); - if (type != head->type) + // Check if the target allocation and file both have the same type + const unsigned int alloc_type = static_cast<unsigned int>(*alloc->element.type.get()); + const unsigned int file_type = root_element_header.type; + + if (file_type > Element::RS_TYPE_FONT) + { + strm.Printf("Warning: File has unknown allocation type"); + strm.EOL(); + } + else if (alloc_type != file_type) { // Enum value isn't monotonous, so doesn't always index RsDataTypeToString array - unsigned int printable_target_type_index = type; - unsigned int printable_head_type_index = head->type; - if (type >= Element::RS_TYPE_ELEMENT && type <= Element::RS_TYPE_FONT) + unsigned int printable_target_type_index = alloc_type; + unsigned int printable_head_type_index = file_type; + if (alloc_type >= Element::RS_TYPE_ELEMENT && alloc_type <= Element::RS_TYPE_FONT) printable_target_type_index = static_cast<Element::DataType>( - (type - Element::RS_TYPE_ELEMENT) + Element::RS_TYPE_MATRIX_2X2 + 1); + (alloc_type - Element::RS_TYPE_ELEMENT) + Element::RS_TYPE_MATRIX_2X2 + 1); - if (head->type >= Element::RS_TYPE_ELEMENT && head->type <= Element::RS_TYPE_FONT) + if (file_type >= Element::RS_TYPE_ELEMENT && file_type <= Element::RS_TYPE_FONT) printable_head_type_index = static_cast<Element::DataType>( - (head->type - Element::RS_TYPE_ELEMENT) + Element::RS_TYPE_MATRIX_2X2 + 1); + (file_type - Element::RS_TYPE_ELEMENT) + Element::RS_TYPE_MATRIX_2X2 + 1); const char* file_type_cstr = AllocationDetails::RsDataTypeToString[printable_head_type_index][0]; const char* target_type_cstr = AllocationDetails::RsDataTypeToString[printable_target_type_index][0]; @@ -2124,8 +2160,11 @@ RenderScriptRuntime::LoadAllocation(Stream &strm, const uint32_t alloc_id, const strm.EOL(); } + // Advance buffer past header + file_buffer = static_cast<uint8_t*>(file_buffer) + file_header->hdr_size; + // Calculate size of allocation data in file - size_t length = data_sp->GetByteSize() - head->hdr_size; + size_t length = data_sp->GetByteSize() - file_header->hdr_size; // Check if the target allocation and file both have the same total data size. const unsigned int alloc_size = *alloc->size.get(); @@ -2154,6 +2193,62 @@ RenderScriptRuntime::LoadAllocation(Stream &strm, const uint32_t alloc_id, const return true; } +// Function takes as parameters a byte buffer, which will eventually be written to file as the element header, +// an offset into that buffer, and an Element that will be saved into the buffer at the parametrised offset. +// Return value is the new offset after writing the element into the buffer. +// Elements are saved to the file as the ElementHeader struct followed by offsets to the structs of all the element's children. +size_t +RenderScriptRuntime::PopulateElementHeaders(const std::shared_ptr<uint8_t> header_buffer, size_t offset, const Element& elem) +{ + // File struct for an element header with all the relevant details copied from elem. + // We assume members are valid already. + AllocationDetails::ElementHeader elem_header; + elem_header.type = *elem.type.get(); + elem_header.kind = *elem.type_kind.get(); + elem_header.element_size = *elem.datum_size.get(); + elem_header.vector_size = *elem.type_vec_size.get(); + elem_header.array_size = elem.array_size.isValid() ? *elem.array_size.get() : 0; + const size_t elem_header_size = sizeof(AllocationDetails::ElementHeader); + + // Copy struct into buffer and advance offset + // We assume that header_buffer has been checked for NULL before this method is called + memcpy(header_buffer.get() + offset, &elem_header, elem_header_size); + offset += elem_header_size; + + // Starting offset of child ElementHeader struct + size_t child_offset = offset + ((elem.children.size() + 1) * sizeof(uint32_t)); + for (const RenderScriptRuntime::Element& child : elem.children) + { + // Recursively populate the buffer with the element header structs of children. + // Then save the offsets where they were set after the parent element header. + memcpy(header_buffer.get() + offset, &child_offset, sizeof(uint32_t)); + offset += sizeof(uint32_t); + + child_offset = PopulateElementHeaders(header_buffer, child_offset, child); + } + + // Zero indicates no more children + memset(header_buffer.get() + offset, 0, sizeof(uint32_t)); + + return child_offset; +} + +// Given an Element object this function returns the total size needed in the file header to store the element's details. +// Taking into account the size of the element header struct, plus the offsets to all the element's children. +// Function is recursive so that the size of all ancestors is taken into account. +size_t +RenderScriptRuntime::CalculateElementHeaderSize(const Element& elem) +{ + size_t size = (elem.children.size() + 1) * sizeof(uint32_t); // Offsets to children plus zero terminator + size += sizeof(AllocationDetails::ElementHeader); // Size of header struct with type details + + // Calculate recursively for all descendants + for (const Element& child : elem.children) + size += CalculateElementHeaderSize(child); + + return size; +} + // Function copies allocation contents into a binary file. // This file can then be loaded later into a different allocation. // There is a header, FileHeader, before the allocation data containing meta-data. @@ -2209,17 +2304,44 @@ RenderScriptRuntime::SaveAllocation(Stream &strm, const uint32_t alloc_id, const // Create the file header AllocationDetails::FileHeader head; head.ident[0] = 'R'; head.ident[1] = 'S'; head.ident[2] = 'A'; head.ident[3] = 'D'; - head.hdr_size = static_cast<uint16_t>(sizeof(AllocationDetails::FileHeader)); - head.type = static_cast<uint16_t>(*alloc->element.type.get()); - head.kind = static_cast<uint32_t>(*alloc->element.type_kind.get()); head.dims[0] = static_cast<uint32_t>(alloc->dimension.get()->dim_1); head.dims[1] = static_cast<uint32_t>(alloc->dimension.get()->dim_2); head.dims[2] = static_cast<uint32_t>(alloc->dimension.get()->dim_3); - head.element_size = static_cast<uint32_t>(*alloc->element.datum_size.get()); + + const size_t element_header_size = CalculateElementHeaderSize(alloc->element); + assert((sizeof(AllocationDetails::FileHeader) + element_header_size) < UINT16_MAX && "Element header too large"); + head.hdr_size = static_cast<uint16_t>(sizeof(AllocationDetails::FileHeader) + element_header_size); // Write the file header size_t num_bytes = sizeof(AllocationDetails::FileHeader); - Error err = file.Write(static_cast<const void*>(&head), num_bytes); + if (log) + log->Printf("RenderScriptRuntime::SaveAllocation - Writing File Header, 0x%zX bytes", num_bytes); + + Error err = file.Write(&head, num_bytes); + if (!err.Success()) + { + strm.Printf("Error: '%s' when writing to file '%s'", err.AsCString(), filename); + strm.EOL(); + return false; + } + + // Create the headers describing the element type of the allocation. + std::shared_ptr<uint8_t> element_header_buffer(new uint8_t[element_header_size]); + if (element_header_buffer == nullptr) + { + strm.Printf("Internal Error: Couldn't allocate %zu bytes on the heap", element_header_size); + strm.EOL(); + return false; + } + + PopulateElementHeaders(element_header_buffer, 0, alloc->element); + + // Write headers for allocation element type to file + num_bytes = element_header_size; + if (log) + log->Printf("RenderScriptRuntime::SaveAllocation - Writing Element Headers, 0x%zX bytes", num_bytes); + + err = file.Write(element_header_buffer.get(), num_bytes); if (!err.Success()) { strm.Printf("Error: '%s' when writing to file '%s'", err.AsCString(), filename); @@ -2230,7 +2352,7 @@ RenderScriptRuntime::SaveAllocation(Stream &strm, const uint32_t alloc_id, const // Write allocation data to file num_bytes = static_cast<size_t>(*alloc->size.get()); if (log) - log->Printf("RenderScriptRuntime::SaveAllocation - Writing 0x%" PRIx64 " bytes from %p", (uint64_t) num_bytes, (void*) buffer.get()); + log->Printf("RenderScriptRuntime::SaveAllocation - Writing 0x%zX bytes", num_bytes); err = file.Write(buffer.get(), num_bytes); if (!err.Success()) diff --git a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h index 0ca268c..2fe4390 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h +++ b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h @@ -207,10 +207,6 @@ public: void Status(Stream &strm) const; - size_t GetAlternateManglings(const ConstString &mangled, std::vector<ConstString> &alternates) override { - return static_cast<size_t>(0); - } - void ModulesDidLoad(const ModuleList &module_list) override; bool LoadAllocation(Stream &strm, const uint32_t alloc_id, const char* filename, StackFrame* frame_ptr); @@ -335,6 +331,9 @@ private: static bool GetFrameVarAsUnsigned(const lldb::StackFrameSP, const char* var_name, uint64_t& val); void FindStructTypeName(Element& elem, StackFrame* frame_ptr); + size_t PopulateElementHeaders(const std::shared_ptr<uint8_t> header_buffer, size_t offset, const Element& elem); + size_t CalculateElementHeaderSize(const Element& elem); + // // Helper functions for jitting the runtime // diff --git a/contrib/llvm/tools/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/contrib/llvm/tools/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index d90b253..4777f09 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -330,15 +330,15 @@ mipsVariantFromElfFlags(const elf::elf_word e_flags, uint32_t endian) { case llvm::ELF::EF_MIPS_ARCH_1: case llvm::ELF::EF_MIPS_ARCH_2: - case llvm::ELF::EF_MIPS_ARCH_3: - case llvm::ELF::EF_MIPS_ARCH_4: - case llvm::ELF::EF_MIPS_ARCH_5: case llvm::ELF::EF_MIPS_ARCH_32: return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips32el : ArchSpec::eMIPSSubType_mips32; case llvm::ELF::EF_MIPS_ARCH_32R2: return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips32r2el : ArchSpec::eMIPSSubType_mips32r2; case llvm::ELF::EF_MIPS_ARCH_32R6: return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips32r6el : ArchSpec::eMIPSSubType_mips32r6; + case llvm::ELF::EF_MIPS_ARCH_3: + case llvm::ELF::EF_MIPS_ARCH_4: + case llvm::ELF::EF_MIPS_ARCH_5: case llvm::ELF::EF_MIPS_ARCH_64: return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips64el : ArchSpec::eMIPSSubType_mips64; case llvm::ELF::EF_MIPS_ARCH_64R2: @@ -954,9 +954,6 @@ ObjectFileELF::GetAddressByteSize() const return m_data.GetAddressByteSize(); } -// Top 16 bits of the `Symbol` flags are available. -#define ARM_ELF_SYM_IS_THUMB (1 << 16) - AddressClass ObjectFileELF::GetAddressClass (addr_t file_addr) { @@ -2195,7 +2192,6 @@ ObjectFileELF::ParseSymbols (Symtab *symtab, // symbol.st_value to produce the final symbol_value // that we store in the symtab. symbol_value_offset = -1; - additional_flags = ARM_ELF_SYM_IS_THUMB; m_address_class_map[symbol.st_value^1] = eAddressClassCodeAlternateISA; } else diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp index bd3978c..ebeba8c 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp @@ -72,6 +72,7 @@ lldb_private::InferiorCallMmap (Process *process, options.SetTryAllThreads(true); options.SetDebug (false); options.SetTimeoutUsec(500000); + options.SetTrapExceptions(false); addr_t prot_arg, flags_arg = 0; if (prot == eMmapProtNone) @@ -172,6 +173,7 @@ lldb_private::InferiorCallMunmap (Process *process, options.SetTryAllThreads(true); options.SetDebug (false); options.SetTimeoutUsec(500000); + options.SetTrapExceptions(false); AddressRange munmap_range; if (sc.GetAddressRange(range_scope, 0, use_inline_block_range, munmap_range)) @@ -214,7 +216,8 @@ lldb_private::InferiorCallMunmap (Process *process, bool lldb_private::InferiorCall (Process *process, const Address *address, - addr_t &returned_func) + addr_t &returned_func, + bool trap_exceptions) { Thread *thread = process->GetThreadList().GetSelectedThread().get(); if (thread == NULL || address == NULL) @@ -227,6 +230,7 @@ lldb_private::InferiorCall (Process *process, options.SetTryAllThreads(true); options.SetDebug (false); options.SetTimeoutUsec(500000); + options.SetTrapExceptions(trap_exceptions); ClangASTContext *clang_ast_context = process->GetTarget().GetScratchClangASTContext(); CompilerType clang_void_ptr_type = clang_ast_context->GetBasicType(eBasicTypeVoid).GetPointerType(); diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.h b/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.h index e56e95c..d10e849 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.h +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.h @@ -31,7 +31,8 @@ bool InferiorCallMmap(Process *proc, lldb::addr_t &allocated_addr, bool InferiorCallMunmap(Process *proc, lldb::addr_t addr, lldb::addr_t length); -bool InferiorCall(Process *proc, const Address *address, lldb::addr_t &returned_func); +bool InferiorCall(Process *proc, const Address *address, lldb::addr_t &returned_func, + bool trap_exceptions = false); } // namespace lldb_private diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp index 278a1d5..efda0ed 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp @@ -265,9 +265,32 @@ RegisterContextLLDB::InitializeZerothFrame() if (!ReadCFAValueForRow (row_register_kind, active_row, m_cfa)) { - UnwindLogMsg ("could not read CFA register for this frame."); - m_frame_type = eNotAValidFrame; - return; + // Try the fall back unwind plan since the + // full unwind plan failed. + FuncUnwindersSP func_unwinders_sp; + UnwindPlanSP call_site_unwind_plan; + bool cfa_status = false; + + if (m_sym_ctx_valid) + { + func_unwinders_sp = pc_module_sp->GetObjectFile()->GetUnwindTable().GetFuncUnwindersContainingAddress (m_current_pc, m_sym_ctx); + } + + if(func_unwinders_sp.get() != nullptr) + call_site_unwind_plan = func_unwinders_sp->GetUnwindPlanAtCallSite(process->GetTarget(), m_current_offset_backed_up_one); + + if (call_site_unwind_plan.get() != nullptr) + { + m_fallback_unwind_plan_sp = call_site_unwind_plan; + if(TryFallbackUnwindPlan()) + cfa_status = true; + } + if (!cfa_status) + { + UnwindLogMsg ("could not read CFA value for first frame."); + m_frame_type = eNotAValidFrame; + return; + } } UnwindLogMsg ("initialized frame current pc is 0x%" PRIx64 " cfa is 0x%" PRIx64 " using %s UnwindPlan", diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp index 5c7f6ca..c0ea9cc 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -101,6 +101,8 @@ GDBRemoteCommunicationClient::GDBRemoteCommunicationClient() : m_supports_QEnvironment (true), m_supports_QEnvironmentHexEncoded (true), m_supports_qSymbol (true), + m_qSymbol_requests_done (false), + m_supports_qModuleInfo (true), m_supports_jThreadsInfo (true), m_curr_pid (LLDB_INVALID_PROCESS_ID), m_curr_tid (LLDB_INVALID_THREAD_ID), @@ -376,6 +378,8 @@ GDBRemoteCommunicationClient::ResetDiscoverableSettings (bool did_exec) m_supports_QEnvironment = true; m_supports_QEnvironmentHexEncoded = true; m_supports_qSymbol = true; + m_qSymbol_requests_done = false; + m_supports_qModuleInfo = true; m_host_arch.Clear(); m_os_version_major = UINT32_MAX; m_os_version_minor = UINT32_MAX; @@ -4284,6 +4288,9 @@ GDBRemoteCommunicationClient::GetModuleInfo (const FileSpec& module_file_spec, const lldb_private::ArchSpec& arch_spec, ModuleSpec &module_spec) { + if (!m_supports_qModuleInfo) + return false; + std::string module_path = module_file_spec.GetPath (false); if (module_path.empty ()) return false; @@ -4299,8 +4306,14 @@ GDBRemoteCommunicationClient::GetModuleInfo (const FileSpec& module_file_spec, if (SendPacketAndWaitForResponse (packet.GetData(), packet.GetSize(), response, false) != PacketResult::Success) return false; - if (response.IsErrorResponse () || response.IsUnsupportedResponse ()) + if (response.IsErrorResponse ()) + return false; + + if (response.IsUnsupportedResponse ()) + { + m_supports_qModuleInfo = false; return false; + } std::string name; std::string value; @@ -4432,11 +4445,42 @@ GDBRemoteCommunicationClient::ReadExtFeature (const lldb_private::ConstString ob // qSymbol:<sym_name> The target requests the value of symbol sym_name (hex encoded). // LLDB may provide the value by sending another qSymbol packet // in the form of"qSymbol:<sym_value>:<sym_name>". +// +// Three examples: +// +// lldb sends: qSymbol:: +// lldb receives: OK +// Remote gdb stub does not need to know the addresses of any symbols, lldb does not +// need to ask again in this session. +// +// lldb sends: qSymbol:: +// lldb receives: qSymbol:64697370617463685f71756575655f6f666673657473 +// lldb sends: qSymbol::64697370617463685f71756575655f6f666673657473 +// lldb receives: OK +// Remote gdb stub asks for address of 'dispatch_queue_offsets'. lldb does not know +// the address at this time. lldb needs to send qSymbol:: again when it has more +// solibs loaded. +// +// lldb sends: qSymbol:: +// lldb receives: qSymbol:64697370617463685f71756575655f6f666673657473 +// lldb sends: qSymbol:2bc97554:64697370617463685f71756575655f6f666673657473 +// lldb receives: OK +// Remote gdb stub asks for address of 'dispatch_queue_offsets'. lldb says that it +// is at address 0x2bc97554. Remote gdb stub sends 'OK' indicating that it does not +// need any more symbols. lldb does not need to ask again in this session. void GDBRemoteCommunicationClient::ServeSymbolLookups(lldb_private::Process *process) { - if (m_supports_qSymbol) + // Set to true once we've resolved a symbol to an address for the remote stub. + // If we get an 'OK' response after this, the remote stub doesn't need any more + // symbols and we can stop asking. + bool symbol_response_provided = false; + + // Is this the inital qSymbol:: packet? + bool first_qsymbol_query = true; + + if (m_supports_qSymbol && m_qSymbol_requests_done == false) { Mutex::Locker locker; if (GetSequenceMutex(locker, "GDBRemoteCommunicationClient::ServeSymbolLookups() failed due to not getting the sequence mutex")) @@ -4448,9 +4492,15 @@ GDBRemoteCommunicationClient::ServeSymbolLookups(lldb_private::Process *process) { if (response.IsOKResponse()) { + if (symbol_response_provided || first_qsymbol_query) + { + m_qSymbol_requests_done = true; + } + // We are done serving symbols requests return; } + first_qsymbol_query = false; if (response.IsUnsupportedResponse()) { @@ -4530,7 +4580,14 @@ GDBRemoteCommunicationClient::ServeSymbolLookups(lldb_private::Process *process) packet.Clear(); packet.PutCString("qSymbol:"); if (symbol_load_addr != LLDB_INVALID_ADDRESS) + { packet.Printf("%" PRIx64, symbol_load_addr); + symbol_response_provided = true; + } + else + { + symbol_response_provided = false; + } packet.PutCString(":"); packet.PutBytesAsRawHex8(symbol_name.data(), symbol_name.size()); continue; // go back to the while loop and send "packet" and wait for another response diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h index d2df214..311b0f3 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h @@ -619,6 +619,8 @@ protected: m_supports_QEnvironment:1, m_supports_QEnvironmentHexEncoded:1, m_supports_qSymbol:1, + m_qSymbol_requests_done:1, + m_supports_qModuleInfo:1, m_supports_jThreadsInfo:1; lldb::pid_t m_curr_pid; diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index bb528eb..6bee4e1 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -172,118 +172,6 @@ namespace { } // anonymous namespace end -class ProcessGDBRemote::GDBLoadedModuleInfoList -{ -public: - - class LoadedModuleInfo - { - public: - - enum e_data_point - { - e_has_name = 0, - e_has_base , - e_has_dynamic , - e_has_link_map , - e_num - }; - - LoadedModuleInfo () - { - for (uint32_t i = 0; i < e_num; ++i) - m_has[i] = false; - } - - void set_name (const std::string & name) - { - m_name = name; - m_has[e_has_name] = true; - } - bool get_name (std::string & out) const - { - out = m_name; - return m_has[e_has_name]; - } - - void set_base (const lldb::addr_t base) - { - m_base = base; - m_has[e_has_base] = true; - } - bool get_base (lldb::addr_t & out) const - { - out = m_base; - return m_has[e_has_base]; - } - - void set_base_is_offset (bool is_offset) - { - m_base_is_offset = is_offset; - } - bool get_base_is_offset(bool & out) const - { - out = m_base_is_offset; - return m_has[e_has_base]; - } - - void set_link_map (const lldb::addr_t addr) - { - m_link_map = addr; - m_has[e_has_link_map] = true; - } - bool get_link_map (lldb::addr_t & out) const - { - out = m_link_map; - return m_has[e_has_link_map]; - } - - void set_dynamic (const lldb::addr_t addr) - { - m_dynamic = addr; - m_has[e_has_dynamic] = true; - } - bool get_dynamic (lldb::addr_t & out) const - { - out = m_dynamic; - return m_has[e_has_dynamic]; - } - - bool has_info (e_data_point datum) - { - assert (datum < e_num); - return m_has[datum]; - } - - protected: - - bool m_has[e_num]; - std::string m_name; - lldb::addr_t m_link_map; - lldb::addr_t m_base; - bool m_base_is_offset; - lldb::addr_t m_dynamic; - }; - - GDBLoadedModuleInfoList () - : m_list () - , m_link_map (LLDB_INVALID_ADDRESS) - {} - - void add (const LoadedModuleInfo & mod) - { - m_list.push_back (mod); - } - - void clear () - { - m_list.clear (); - } - - std::vector<LoadedModuleInfo> m_list; - lldb::addr_t m_link_map; -}; - // TODO Randomly assigning a port is unsafe. We should get an unused // ephemeral port from the kernel and make sure we reserve it before passing // it to debugserver. @@ -2033,6 +1921,8 @@ ProcessGDBRemote::SetThreadStopInfo (lldb::tid_t tid, const std::vector<addr_t> &exc_data, addr_t thread_dispatch_qaddr, bool queue_vars_valid, // Set to true if queue_name, queue_kind and queue_serial are valid + LazyBool associated_with_dispatch_queue, + addr_t dispatch_queue_t, std::string &queue_name, QueueKind queue_kind, uint64_t queue_serial) @@ -2073,10 +1963,15 @@ ProcessGDBRemote::SetThreadStopInfo (lldb::tid_t tid, gdb_thread->SetThreadDispatchQAddr (thread_dispatch_qaddr); // Check if the GDB server was able to provide the queue name, kind and serial number if (queue_vars_valid) - gdb_thread->SetQueueInfo(std::move(queue_name), queue_kind, queue_serial); + gdb_thread->SetQueueInfo(std::move(queue_name), queue_kind, queue_serial, dispatch_queue_t, associated_with_dispatch_queue); else gdb_thread->ClearQueueInfo(); + gdb_thread->SetAssociatedWithLibdispatchQueue (associated_with_dispatch_queue); + + if (dispatch_queue_t != LLDB_INVALID_ADDRESS) + gdb_thread->SetQueueLibdispatchQueueAddress (dispatch_queue_t); + // Make sure we update our thread stop reason just once if (!thread_sp->StopInfoIsUpToDate()) { @@ -2247,9 +2142,11 @@ ProcessGDBRemote::SetThreadStopInfo (StructuredData::Dictionary *thread_dict) static ConstString g_key_metype("metype"); static ConstString g_key_medata("medata"); static ConstString g_key_qaddr("qaddr"); + static ConstString g_key_dispatch_queue_t("dispatch_queue_t"); + static ConstString g_key_associated_with_dispatch_queue("associated_with_dispatch_queue"); static ConstString g_key_queue_name("qname"); static ConstString g_key_queue_kind("qkind"); - static ConstString g_key_queue_serial("qserial"); + static ConstString g_key_queue_serial_number("qserialnum"); static ConstString g_key_registers("registers"); static ConstString g_key_memory("memory"); static ConstString g_key_address("address"); @@ -2269,9 +2166,11 @@ ProcessGDBRemote::SetThreadStopInfo (StructuredData::Dictionary *thread_dict) addr_t thread_dispatch_qaddr = LLDB_INVALID_ADDRESS; ExpeditedRegisterMap expedited_register_map; bool queue_vars_valid = false; + addr_t dispatch_queue_t = LLDB_INVALID_ADDRESS; + LazyBool associated_with_dispatch_queue = eLazyBoolCalculate; std::string queue_name; QueueKind queue_kind = eQueueKindUnknown; - uint64_t queue_serial = 0; + uint64_t queue_serial_number = 0; // Iterate through all of the thread dictionary key/value pairs from the structured data dictionary thread_dict->ForEach([this, @@ -2285,9 +2184,11 @@ ProcessGDBRemote::SetThreadStopInfo (StructuredData::Dictionary *thread_dict) &exc_data, &thread_dispatch_qaddr, &queue_vars_valid, + &associated_with_dispatch_queue, + &dispatch_queue_t, &queue_name, &queue_kind, - &queue_serial] + &queue_serial_number] (ConstString key, StructuredData::Object* object) -> bool { if (key == g_key_tid) @@ -2339,12 +2240,27 @@ ProcessGDBRemote::SetThreadStopInfo (StructuredData::Dictionary *thread_dict) queue_kind = eQueueKindConcurrent; } } - else if (key == g_key_queue_serial) + else if (key == g_key_queue_serial_number) { - queue_serial = object->GetIntegerValue(0); - if (queue_serial != 0) + queue_serial_number = object->GetIntegerValue(0); + if (queue_serial_number != 0) queue_vars_valid = true; } + else if (key == g_key_dispatch_queue_t) + { + dispatch_queue_t = object->GetIntegerValue(0); + if (dispatch_queue_t != 0 && dispatch_queue_t != LLDB_INVALID_ADDRESS) + queue_vars_valid = true; + } + else if (key == g_key_associated_with_dispatch_queue) + { + queue_vars_valid = true; + bool associated = object->GetBooleanValue (); + if (associated) + associated_with_dispatch_queue = eLazyBoolYes; + else + associated_with_dispatch_queue = eLazyBoolNo; + } else if (key == g_key_reason) { reason = object->GetStringValue(); @@ -2415,9 +2331,11 @@ ProcessGDBRemote::SetThreadStopInfo (StructuredData::Dictionary *thread_dict) exc_data, thread_dispatch_qaddr, queue_vars_valid, + associated_with_dispatch_queue, + dispatch_queue_t, queue_name, queue_kind, - queue_serial); + queue_serial_number); } StateType @@ -2460,9 +2378,11 @@ ProcessGDBRemote::SetThreadStopInfo (StringExtractor& stop_packet) std::vector<addr_t> exc_data; addr_t thread_dispatch_qaddr = LLDB_INVALID_ADDRESS; bool queue_vars_valid = false; // says if locals below that start with "queue_" are valid + addr_t dispatch_queue_t = LLDB_INVALID_ADDRESS; + LazyBool associated_with_dispatch_queue = eLazyBoolCalculate; std::string queue_name; QueueKind queue_kind = eQueueKindUnknown; - uint64_t queue_serial = 0; + uint64_t queue_serial_number = 0; ExpeditedRegisterMap expedited_register_map; while (stop_packet.GetNameColonValue(key, value)) { @@ -2553,6 +2473,11 @@ ProcessGDBRemote::SetThreadStopInfo (StringExtractor& stop_packet) { thread_dispatch_qaddr = StringConvert::ToUInt64 (value.c_str(), 0, 16); } + else if (key.compare("dispatch_queue_t") == 0) + { + queue_vars_valid = true; + dispatch_queue_t = StringConvert::ToUInt64 (value.c_str(), 0, 16); + } else if (key.compare("qname") == 0) { queue_vars_valid = true; @@ -2576,10 +2501,10 @@ ProcessGDBRemote::SetThreadStopInfo (StringExtractor& stop_packet) queue_kind = eQueueKindConcurrent; } } - else if (key.compare("qserial") == 0) + else if (key.compare("qserialnum") == 0) { - queue_serial = StringConvert::ToUInt64 (value.c_str(), 0, 0); - if (queue_serial != 0) + queue_serial_number = StringConvert::ToUInt64 (value.c_str(), 0, 0); + if (queue_serial_number != 0) queue_vars_valid = true; } else if (key.compare("reason") == 0) @@ -2677,9 +2602,11 @@ ProcessGDBRemote::SetThreadStopInfo (StringExtractor& stop_packet) exc_data, thread_dispatch_qaddr, queue_vars_valid, + associated_with_dispatch_queue, + dispatch_queue_t, queue_name, queue_kind, - queue_serial); + queue_serial_number); return eStateStopped; } @@ -3052,7 +2979,7 @@ ProcessGDBRemote::GetImageInfoAddress() // the loaded module list can also provides a link map address if (addr == LLDB_INVALID_ADDRESS) { - GDBLoadedModuleInfoList list; + LoadedModuleInfoList list; if (GetLoadedModuleList (list).Success()) addr = list.m_link_map; } @@ -4704,7 +4631,7 @@ ProcessGDBRemote::GetGDBServerRegisterInfo () } Error -ProcessGDBRemote::GetLoadedModuleList (GDBLoadedModuleInfoList & list) +ProcessGDBRemote::GetLoadedModuleList (LoadedModuleInfoList & list) { // Make sure LLDB has an XML parser it can use first if (!XMLDocument::XMLEnabled()) @@ -4748,7 +4675,7 @@ ProcessGDBRemote::GetLoadedModuleList (GDBLoadedModuleInfoList & list) root_element.ForEachChildElementWithName("library", [log, &list](const XMLNode &library) -> bool { - GDBLoadedModuleInfoList::LoadedModuleInfo module; + LoadedModuleInfoList::LoadedModuleInfo module; library.ForEachAttribute([log, &module](const llvm::StringRef &name, const llvm::StringRef &value) -> bool { @@ -4818,7 +4745,7 @@ ProcessGDBRemote::GetLoadedModuleList (GDBLoadedModuleInfoList & list) return Error(); root_element.ForEachChildElementWithName("library", [log, &list](const XMLNode &library) -> bool { - GDBLoadedModuleInfoList::LoadedModuleInfo module; + LoadedModuleInfoList::LoadedModuleInfo module; llvm::StringRef name = library.GetAttributeValue("name"); module.set_name(name.str()); @@ -4880,19 +4807,18 @@ ProcessGDBRemote::LoadModuleAtAddress (const FileSpec &file, lldb::addr_t base_a } size_t -ProcessGDBRemote::LoadModules () +ProcessGDBRemote::LoadModules (LoadedModuleInfoList &module_list) { using lldb_private::process_gdb_remote::ProcessGDBRemote; // request a list of loaded libraries from GDBServer - GDBLoadedModuleInfoList module_list; if (GetLoadedModuleList (module_list).Fail()) return 0; // get a list of all the modules ModuleList new_modules; - for (GDBLoadedModuleInfoList::LoadedModuleInfo & modInfo : module_list.m_list) + for (LoadedModuleInfoList::LoadedModuleInfo & modInfo : module_list.m_list) { std::string mod_name; lldb::addr_t mod_base; @@ -4943,6 +4869,14 @@ ProcessGDBRemote::LoadModules () } return new_modules.GetSize(); + +} + +size_t +ProcessGDBRemote::LoadModules () +{ + LoadedModuleInfoList module_list; + return LoadModules (module_list); } Error diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h index 5474982..b48edd8 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h @@ -27,6 +27,7 @@ #include "lldb/Core/StringList.h" #include "lldb/Core/StructuredData.h" #include "lldb/Core/ThreadSafeValue.h" +#include "lldb/Core/LoadedModuleInfoList.h" #include "lldb/Host/HostThread.h" #include "lldb/lldb-private-forward.h" #include "lldb/Utility/StringExtractor.h" @@ -245,6 +246,9 @@ public: uint32_t &update) override; size_t + LoadModules(LoadedModuleInfoList &module_list) override; + + size_t LoadModules() override; Error @@ -261,8 +265,6 @@ protected: friend class GDBRemoteCommunicationClient; friend class GDBRemoteRegisterContext; - class GDBLoadedModuleInfoList; - //------------------------------------------------------------------ /// Broadcaster event bits definitions. //------------------------------------------------------------------ @@ -429,6 +431,8 @@ protected: const std::vector<lldb::addr_t> &exc_data, lldb::addr_t thread_dispatch_qaddr, bool queue_vars_valid, + lldb_private::LazyBool associated_with_libdispatch_queue, + lldb::addr_t dispatch_queue_t, std::string &queue_name, lldb::QueueKind queue_kind, uint64_t queue_serial); @@ -461,7 +465,7 @@ protected: // Query remote GDBServer for a detailed loaded library list Error - GetLoadedModuleList (GDBLoadedModuleInfoList &); + GetLoadedModuleList (LoadedModuleInfoList &); lldb::ModuleSP LoadModuleAtAddress (const FileSpec &file, lldb::addr_t base_addr, bool value_is_offset); diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp index 9b410d8..a4af12c 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp @@ -41,8 +41,10 @@ ThreadGDBRemote::ThreadGDBRemote (Process &process, lldb::tid_t tid) : m_thread_name (), m_dispatch_queue_name (), m_thread_dispatch_qaddr (LLDB_INVALID_ADDRESS), - m_queue_kind(eQueueKindUnknown), - m_queue_serial(0) + m_dispatch_queue_t (LLDB_INVALID_ADDRESS), + m_queue_kind (eQueueKindUnknown), + m_queue_serial_number (LLDB_INVALID_QUEUE_ID), + m_associated_with_libdispatch_queue (eLazyBoolCalculate) { ProcessGDBRemoteLog::LogIf(GDBR_LOG_THREAD, "%p: ThreadGDBRemote::ThreadGDBRemote (pid = %i, tid = 0x%4.4x)", this, @@ -73,15 +75,19 @@ ThreadGDBRemote::ClearQueueInfo () { m_dispatch_queue_name.clear(); m_queue_kind = eQueueKindUnknown; - m_queue_serial = 0; + m_queue_serial_number = 0; + m_dispatch_queue_t = LLDB_INVALID_ADDRESS; + m_associated_with_libdispatch_queue = eLazyBoolCalculate; } void -ThreadGDBRemote::SetQueueInfo (std::string &&queue_name, QueueKind queue_kind, uint64_t queue_serial) +ThreadGDBRemote::SetQueueInfo (std::string &&queue_name, QueueKind queue_kind, uint64_t queue_serial, addr_t dispatch_queue_t, LazyBool associated_with_libdispatch_queue) { m_dispatch_queue_name = queue_name; m_queue_kind = queue_kind; - m_queue_serial = queue_serial; + m_queue_serial_number = queue_serial; + m_dispatch_queue_t = dispatch_queue_t; + m_associated_with_libdispatch_queue = associated_with_libdispatch_queue; } @@ -100,7 +106,10 @@ ThreadGDBRemote::GetQueueName () } // Always re-fetch the dispatch queue name since it can change - if (m_thread_dispatch_qaddr != 0 || m_thread_dispatch_qaddr != LLDB_INVALID_ADDRESS) + if (m_associated_with_libdispatch_queue == eLazyBoolNo) + return nullptr; + + if (m_thread_dispatch_qaddr != 0 && m_thread_dispatch_qaddr != LLDB_INVALID_ADDRESS) { ProcessSP process_sp (GetProcess()); if (process_sp) @@ -118,6 +127,35 @@ ThreadGDBRemote::GetQueueName () return NULL; } +QueueKind +ThreadGDBRemote::GetQueueKind () +{ + // If our cached queue info is valid, then someone called ThreadGDBRemote::SetQueueInfo(...) + // with valid information that was gleaned from the stop reply packet. In this case we trust + // that the info is valid in m_dispatch_queue_name without refetching it + if (CachedQueueInfoIsValid()) + { + return m_queue_kind; + } + + if (m_associated_with_libdispatch_queue == eLazyBoolNo) + return eQueueKindUnknown; + + if (m_thread_dispatch_qaddr != 0 && m_thread_dispatch_qaddr != LLDB_INVALID_ADDRESS) + { + ProcessSP process_sp (GetProcess()); + if (process_sp) + { + SystemRuntime *runtime = process_sp->GetSystemRuntime (); + if (runtime) + m_queue_kind = runtime->GetQueueKind (m_thread_dispatch_qaddr); + return m_queue_kind; + } + } + return eQueueKindUnknown; +} + + queue_id_t ThreadGDBRemote::GetQueueID () { @@ -125,9 +163,12 @@ ThreadGDBRemote::GetQueueID () // with valid information that was gleaned from the stop reply packet. In this case we trust // that the info is valid in m_dispatch_queue_name without refetching it if (CachedQueueInfoIsValid()) - return m_queue_serial; + return m_queue_serial_number; + + if (m_associated_with_libdispatch_queue == eLazyBoolNo) + return LLDB_INVALID_QUEUE_ID; - if (m_thread_dispatch_qaddr != 0 || m_thread_dispatch_qaddr != LLDB_INVALID_ADDRESS) + if (m_thread_dispatch_qaddr != 0 && m_thread_dispatch_qaddr != LLDB_INVALID_ADDRESS) { ProcessSP process_sp (GetProcess()); if (process_sp) @@ -161,20 +202,54 @@ ThreadGDBRemote::GetQueue () addr_t ThreadGDBRemote::GetQueueLibdispatchQueueAddress () { - addr_t dispatch_queue_t_addr = LLDB_INVALID_ADDRESS; - if (m_thread_dispatch_qaddr != 0 || m_thread_dispatch_qaddr != LLDB_INVALID_ADDRESS) + if (m_dispatch_queue_t == LLDB_INVALID_ADDRESS) { - ProcessSP process_sp (GetProcess()); - if (process_sp) + if (m_thread_dispatch_qaddr != 0 && m_thread_dispatch_qaddr != LLDB_INVALID_ADDRESS) { - SystemRuntime *runtime = process_sp->GetSystemRuntime (); - if (runtime) + ProcessSP process_sp (GetProcess()); + if (process_sp) { - dispatch_queue_t_addr = runtime->GetLibdispatchQueueAddressFromThreadQAddress (m_thread_dispatch_qaddr); + SystemRuntime *runtime = process_sp->GetSystemRuntime (); + if (runtime) + { + m_dispatch_queue_t = runtime->GetLibdispatchQueueAddressFromThreadQAddress (m_thread_dispatch_qaddr); + } } } } - return dispatch_queue_t_addr; + return m_dispatch_queue_t; +} + +void +ThreadGDBRemote::SetQueueLibdispatchQueueAddress (lldb::addr_t dispatch_queue_t) +{ + m_dispatch_queue_t = dispatch_queue_t; +} + +bool +ThreadGDBRemote::ThreadHasQueueInformation () const +{ + if (m_thread_dispatch_qaddr != 0 + && m_thread_dispatch_qaddr != LLDB_INVALID_ADDRESS + && m_dispatch_queue_t != LLDB_INVALID_ADDRESS + && m_queue_kind != eQueueKindUnknown + && m_queue_serial_number != 0) + { + return true; + } + return false; +} + +LazyBool +ThreadGDBRemote::GetAssociatedWithLibdispatchQueue () +{ + return m_associated_with_libdispatch_queue; +} + +void +ThreadGDBRemote::SetAssociatedWithLibdispatchQueue (LazyBool associated_with_libdispatch_queue) +{ + m_associated_with_libdispatch_queue = associated_with_libdispatch_queue; } StructuredData::ObjectSP diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.h b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.h index 24693ba..d7619f4 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.h +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.h @@ -46,6 +46,9 @@ public: const char * GetQueueName () override; + lldb::QueueKind + GetQueueKind () override; + lldb::queue_id_t GetQueueID () override; @@ -55,6 +58,12 @@ public: lldb::addr_t GetQueueLibdispatchQueueAddress () override; + void + SetQueueLibdispatchQueueAddress (lldb::addr_t dispatch_queue_t) override; + + bool + ThreadHasQueueInformation () const override; + lldb::RegisterContextSP GetRegisterContext () override; @@ -98,7 +107,13 @@ public: ClearQueueInfo (); void - SetQueueInfo (std::string &&queue_name, lldb::QueueKind queue_kind, uint64_t queue_serial); + SetQueueInfo (std::string &&queue_name, lldb::QueueKind queue_kind, uint64_t queue_serial, lldb::addr_t dispatch_queue_t, lldb_private::LazyBool associated_with_libdispatch_queue); + + lldb_private::LazyBool + GetAssociatedWithLibdispatchQueue () override; + + void + SetAssociatedWithLibdispatchQueue (lldb_private::LazyBool associated_with_libdispatch_queue) override; StructuredData::ObjectSP FetchThreadExtendedInfo () override; @@ -109,8 +124,10 @@ protected: std::string m_thread_name; std::string m_dispatch_queue_name; lldb::addr_t m_thread_dispatch_qaddr; + lldb::addr_t m_dispatch_queue_t; lldb::QueueKind m_queue_kind; // Queue info from stop reply/stop info for thread - uint64_t m_queue_serial; // Queue info from stop reply/stop info for thread + uint64_t m_queue_serial_number; // Queue info from stop reply/stop info for thread + lldb_private::LazyBool m_associated_with_libdispatch_queue; bool PrivateSetRegisterValue (uint32_t reg, diff --git a/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp b/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp index 3107677..23bacc9 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp @@ -77,6 +77,10 @@ PythonObject::GetObjectType() const return PyObjectType::Dictionary; if (PythonString::Check(m_py_obj)) return PyObjectType::String; +#if PY_MAJOR_VERSION >= 3 + if (PythonBytes::Check(m_py_obj)) + return PyObjectType::Bytes; +#endif if (PythonInteger::Check(m_py_obj)) return PyObjectType::Integer; if (PythonFile::Check(m_py_obj)) @@ -210,6 +214,8 @@ PythonObject::CreateStructuredObject() const return PythonList(PyRefType::Borrowed, m_py_obj).CreateStructuredArray(); case PyObjectType::String: return PythonString(PyRefType::Borrowed, m_py_obj).CreateStructuredString(); + case PyObjectType::Bytes: + return PythonBytes(PyRefType::Borrowed, m_py_obj).CreateStructuredString(); case PyObjectType::None: return StructuredData::ObjectSP(); default: @@ -220,6 +226,104 @@ PythonObject::CreateStructuredObject() const //---------------------------------------------------------------------- // PythonString //---------------------------------------------------------------------- +PythonBytes::PythonBytes() : PythonObject() +{ +} + +PythonBytes::PythonBytes(llvm::ArrayRef<uint8_t> bytes) : PythonObject() +{ + SetBytes(bytes); +} + +PythonBytes::PythonBytes(const uint8_t *bytes, size_t length) : PythonObject() +{ + SetBytes(llvm::ArrayRef<uint8_t>(bytes, length)); +} + +PythonBytes::PythonBytes(PyRefType type, PyObject *py_obj) : PythonObject() +{ + Reset(type, py_obj); // Use "Reset()" to ensure that py_obj is a string +} + +PythonBytes::PythonBytes(const PythonBytes &object) : PythonObject(object) +{ +} + +PythonBytes::~PythonBytes() +{ +} + +bool +PythonBytes::Check(PyObject *py_obj) +{ + if (!py_obj) + return false; + if (PyBytes_Check(py_obj)) + return true; + return false; +} + +void +PythonBytes::Reset(PyRefType type, PyObject *py_obj) +{ + // Grab the desired reference type so that if we end up rejecting + // `py_obj` it still gets decremented if necessary. + PythonObject result(type, py_obj); + + if (!PythonBytes::Check(py_obj)) + { + PythonObject::Reset(); + return; + } + + // Calling PythonObject::Reset(const PythonObject&) will lead to stack overflow since it calls + // back into the virtual implementation. + PythonObject::Reset(PyRefType::Borrowed, result.get()); +} + +llvm::ArrayRef<uint8_t> +PythonBytes::GetBytes() const +{ + if (!IsValid()) + return llvm::ArrayRef<uint8_t>(); + + Py_ssize_t size; + char *c; + + PyBytes_AsStringAndSize(m_py_obj, &c, &size); + return llvm::ArrayRef<uint8_t>(reinterpret_cast<uint8_t *>(c), size); +} + +size_t +PythonBytes::GetSize() const +{ + if (!IsValid()) + return 0; + return PyBytes_Size(m_py_obj); +} + +void +PythonBytes::SetBytes(llvm::ArrayRef<uint8_t> bytes) +{ + const char *data = reinterpret_cast<const char *>(bytes.data()); + PyObject *py_bytes = PyBytes_FromStringAndSize(data, bytes.size()); + PythonObject::Reset(PyRefType::Owned, py_bytes); +} + +StructuredData::StringSP +PythonBytes::CreateStructuredString() const +{ + StructuredData::StringSP result(new StructuredData::String); + Py_ssize_t size; + char *c; + PyBytes_AsStringAndSize(m_py_obj, &c, &size); + result->SetValue(std::string(c, size)); + return result; +} + +//---------------------------------------------------------------------- +// PythonString +//---------------------------------------------------------------------- PythonString::PythonString(PyRefType type, PyObject *py_obj) : PythonObject() diff --git a/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h b/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h index c9d17c0..06264b6 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h +++ b/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h @@ -23,8 +23,11 @@ #include "lldb/Host/File.h" #include "lldb/Interpreter/OptionValue.h" +#include "llvm/ADT/ArrayRef.h" + namespace lldb_private { +class PythonBytes; class PythonString; class PythonList; class PythonDictionary; @@ -71,6 +74,7 @@ enum class PyObjectType Dictionary, List, String, + Bytes, Module, Callable, Tuple, @@ -256,6 +260,39 @@ protected: PyObject* m_py_obj; }; +class PythonBytes : public PythonObject +{ +public: + PythonBytes(); + explicit PythonBytes(llvm::ArrayRef<uint8_t> bytes); + PythonBytes(const uint8_t *bytes, size_t length); + PythonBytes(PyRefType type, PyObject *o); + PythonBytes(const PythonBytes &object); + + ~PythonBytes() override; + + static bool + Check(PyObject *py_obj); + + // Bring in the no-argument base class version + using PythonObject::Reset; + + void + Reset(PyRefType type, PyObject *py_obj) override; + + llvm::ArrayRef<uint8_t> + GetBytes() const; + + size_t + GetSize() const; + + void + SetBytes(llvm::ArrayRef<uint8_t> stringbytes); + + StructuredData::StringSP + CreateStructuredString() const; +}; + class PythonString : public PythonObject { public: diff --git a/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index b1dd34b..19ad86d 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -1692,10 +1692,10 @@ ScriptInterpreterPython::OSPlugin_RegisterContextData(StructuredData::ObjectSP o PyErr_Clear(); } - assert(PythonString::Check(py_return.get()) && "get_register_data returned unknown object type!"); + assert(PythonBytes::Check(py_return.get()) && "get_register_data returned unknown object type!"); - PythonString result_string(PyRefType::Borrowed, py_return.get()); - return result_string.CreateStructuredString(); + PythonBytes result(PyRefType::Borrowed, py_return.get()); + return result.CreateStructuredString(); } StructuredData::DictionarySP diff --git a/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h b/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h index a5484db..3df07d5 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h +++ b/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h @@ -33,6 +33,18 @@ struct DIERef lldb::user_id_t GetUID() const; + bool + operator< (const DIERef &ref) const + { + return die_offset < ref.die_offset; + } + + bool + operator< (const DIERef &ref) + { + return die_offset < ref.die_offset; + } + dw_offset_t cu_offset; dw_offset_t die_offset; }; diff --git a/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 68a0285..74b54d6 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -2928,7 +2928,7 @@ DWARFASTParserClang::ParseChildMembers (const SymbolContext& sc, if (member_byte_offset >= parent_byte_size) { - if (member_array_size != 1) + if (member_array_size != 1 && (member_array_size != 0 || member_byte_offset > parent_byte_size)) { module_sp->ReportError ("0x%8.8" PRIx64 ": DW_TAG_member '%s' refers to type 0x%8.8" PRIx64 " which extends beyond the bounds of 0x%8.8" PRIx64, die.GetID(), diff --git a/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 0ed4d05..2088864 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -1065,12 +1065,17 @@ SymbolFileDWARF::ParseCompileUnitSupportFiles (const SymbolContext& sc, FileSpec const char * cu_comp_dir = resolveCompDir(cu_die.GetAttributeValueAsString(DW_AT_comp_dir, nullptr)); const dw_offset_t stmt_list = cu_die.GetAttributeValueAsUnsigned(DW_AT_stmt_list, DW_INVALID_OFFSET); - - // All file indexes in DWARF are one based and a file of index zero is - // supposed to be the compile unit itself. - support_files.Append (*sc.comp_unit); - - return DWARFDebugLine::ParseSupportFiles(sc.comp_unit->GetModule(), get_debug_line_data(), cu_comp_dir, stmt_list, support_files); + if (stmt_list != DW_INVALID_OFFSET) + { + // All file indexes in DWARF are one based and a file of index zero is + // supposed to be the compile unit itself. + support_files.Append (*sc.comp_unit); + return DWARFDebugLine::ParseSupportFiles(sc.comp_unit->GetModule(), + get_debug_line_data(), + cu_comp_dir, + stmt_list, + support_files); + } } } return false; @@ -2927,6 +2932,40 @@ SymbolFileDWARF::FindFunctions(const RegularExpression& regex, bool include_inli return sc_list.GetSize() - original_size; } +void +SymbolFileDWARF::GetMangledNamesForFunction (const std::string &scope_qualified_name, + std::vector<ConstString> &mangled_names) +{ + DWARFDebugInfo* info = DebugInfo(); + uint32_t num_comp_units = 0; + if (info) + num_comp_units = info->GetNumCompileUnits(); + + for (uint32_t i = 0; i < num_comp_units; i++) + { + DWARFCompileUnit *cu = info->GetCompileUnitAtIndex(i); + if (cu == nullptr) + continue; + + SymbolFileDWARFDwo *dwo = cu->GetDwoSymbolFile(); + if (dwo) + dwo->GetMangledNamesForFunction(scope_qualified_name, mangled_names); + } + + NameToOffsetMap::iterator iter = m_function_scope_qualified_name_map.find(scope_qualified_name); + if (iter == m_function_scope_qualified_name_map.end()) + return; + + DIERefSetSP set_sp = (*iter).second; + std::set<DIERef>::iterator set_iter; + for (set_iter = set_sp->begin(); set_iter != set_sp->end(); set_iter++) + { + DWARFDIE die = DebugInfo()->GetDIE (*set_iter); + mangled_names.push_back(ConstString(die.GetMangledName())); + } +} + + uint32_t SymbolFileDWARF::FindTypes (const SymbolContext& sc, const ConstString &name, @@ -3751,6 +3790,24 @@ SymbolFileDWARF::ParseType (const SymbolContext& sc, const DWARFDIE &die, bool * TypeList* type_list = GetTypeList(); if (type_list) type_list->Insert(type_sp); + + if (die.Tag() == DW_TAG_subprogram) + { + DIERef die_ref = die.GetDIERef(); + std::string scope_qualified_name(GetDeclContextForUID(die.GetID()).GetScopeQualifiedName().AsCString("")); + if (scope_qualified_name.size()) + { + NameToOffsetMap::iterator iter = m_function_scope_qualified_name_map.find(scope_qualified_name); + if (iter != m_function_scope_qualified_name_map.end()) + (*iter).second->insert(die_ref); + else + { + DIERefSetSP new_set(new std::set<DIERef>); + new_set->insert(die_ref); + m_function_scope_qualified_name_map.emplace(std::make_pair(scope_qualified_name, new_set)); + } + } + } } } } diff --git a/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index c2e78a4..be09759 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -208,6 +208,10 @@ public: bool append, lldb_private::SymbolContextList& sc_list) override; + void + GetMangledNamesForFunction (const std::string &scope_qualified_name, + std::vector<lldb_private::ConstString> &mangled_names) override; + uint32_t FindTypes (const lldb_private::SymbolContext& sc, const lldb_private::ConstString &name, @@ -577,6 +581,9 @@ protected: m_fetched_external_modules:1; lldb_private::LazyBool m_supports_DW_AT_APPLE_objc_complete_type; + typedef std::shared_ptr<std::set<DIERef> > DIERefSetSP; + typedef std::unordered_map<std::string, DIERefSetSP> NameToOffsetMap; + NameToOffsetMap m_function_scope_qualified_name_map; std::unique_ptr<DWARFDebugRanges> m_ranges; UniqueDWARFASTTypeMap m_unique_ast_type_map; DIEToTypePtr m_die_to_type; diff --git a/contrib/llvm/tools/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp b/contrib/llvm/tools/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp index 7e4c696..76f0b48 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp @@ -155,6 +155,7 @@ private: bool mov_reg_to_local_stack_frame_p (int& regno, int& fp_offset); bool ret_pattern_p (); bool pop_rbp_pattern_p (); + bool leave_pattern_p (); bool call_next_insn_pattern_p(); uint32_t extract_4 (uint8_t *b); bool machine_regno_to_lldb_regno (int machine_regno, uint32_t& lldb_regno); @@ -492,6 +493,14 @@ AssemblyParse_x86::pop_rbp_pattern_p () return (*p == 0x5d); } +// leave [0xc9] +bool +AssemblyParse_x86::leave_pattern_p () +{ + uint8_t *p = m_cur_insn_bytes; + return (*p == 0xc9); +} + // call $0 [0xe8 0x0 0x0 0x0 0x0] bool AssemblyParse_x86::call_next_insn_pattern_p () @@ -780,8 +789,7 @@ AssemblyParse_x86::get_non_call_site_unwind_plan (UnwindPlan &unwind_plan) if (machine_regno == (int)m_machine_fp_regnum) { - row->GetCFAValue().SetIsRegisterPlusOffset (m_lldb_sp_regnum, - row->GetCFAValue().GetOffset()); + row->GetCFAValue().SetIsRegisterPlusOffset (m_lldb_sp_regnum, row->GetCFAValue().GetOffset()); } in_epilogue = true; @@ -792,12 +800,35 @@ AssemblyParse_x86::get_non_call_site_unwind_plan (UnwindPlan &unwind_plan) // we need to add a new row of instructions. if (row->GetCFAValue().GetRegisterNumber() == m_lldb_sp_regnum) { - row->GetCFAValue().SetIsRegisterPlusOffset(m_lldb_sp_regnum, - current_sp_bytes_offset_from_cfa); + row->GetCFAValue().SetIsRegisterPlusOffset (m_lldb_sp_regnum, current_sp_bytes_offset_from_cfa); row_updated = true; } } + // The LEAVE instruction moves the value from rbp into rsp and pops + // a value off the stack into rbp (restoring the caller's rbp value). + // It is the opposite of ENTER, or 'push rbp, mov rsp rbp'. + else if (leave_pattern_p ()) + { + // We're going to copy the value in rbp into rsp, so re-set the sp offset + // based on the CFAValue. Also, adjust it to recognize that we're popping + // the saved rbp value off the stack. + current_sp_bytes_offset_from_cfa = row->GetCFAValue().GetOffset(); + current_sp_bytes_offset_from_cfa -= m_wordsize; + row->GetCFAValue().SetOffset (current_sp_bytes_offset_from_cfa); + + // rbp is restored to the caller's value + saved_registers[m_machine_fp_regnum] = false; + row->RemoveRegisterInfo (m_lldb_fp_regnum); + + // cfa is now in terms of rsp again. + row->GetCFAValue().SetIsRegisterPlusOffset (m_lldb_sp_regnum, row->GetCFAValue().GetOffset()); + row->GetCFAValue().SetOffset (current_sp_bytes_offset_from_cfa); + + in_epilogue = true; + row_updated = true; + } + else if (mov_reg_to_local_stack_frame_p (machine_regno, stack_offset) && nonvolatile_reg_p (machine_regno) && machine_regno_to_lldb_regno (machine_regno, lldb_regno) @@ -1137,15 +1168,15 @@ AssemblyParse_x86::augment_unwind_plan_from_call_site (AddressRange& func, Unwin // The only case we care about is epilogue: // [0x5d] pop %rbp/%ebp // => [0xc3] ret - if (pop_rbp_pattern_p ()) + if (pop_rbp_pattern_p () || leave_pattern_p ()) { if (target->ReadMemory (m_cur_insn, prefer_file_cache, m_cur_insn_bytes, 1, error) != static_cast<size_t>(-1) && ret_pattern_p ()) { row->SetOffset (offset); - row->GetCFAValue().SetIsRegisterPlusOffset ( - first_row->GetCFAValue().GetRegisterNumber(), m_wordsize); + row->GetCFAValue().SetIsRegisterPlusOffset (first_row->GetCFAValue().GetRegisterNumber(), + m_wordsize); UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row)); unwind_plan.InsertRow (new_row); diff --git a/contrib/llvm/tools/lldb/source/Symbol/ClangASTContext.cpp b/contrib/llvm/tools/lldb/source/Symbol/ClangASTContext.cpp index 8b11c23..621bd16 100644 --- a/contrib/llvm/tools/lldb/source/Symbol/ClangASTContext.cpp +++ b/contrib/llvm/tools/lldb/source/Symbol/ClangASTContext.cpp @@ -4158,6 +4158,7 @@ ClangASTContext::GetTypeClass (lldb::opaque_compiler_type_t type) case clang::Type::Decltype: break; case clang::Type::TemplateSpecialization: break; case clang::Type::Atomic: break; + case clang::Type::Pipe: break; // pointer type decayed from an array or function type. case clang::Type::Decayed: break; @@ -4891,6 +4892,7 @@ ClangASTContext::GetEncoding (lldb::opaque_compiler_type_t type, uint64_t &count case clang::Type::TemplateSpecialization: case clang::Type::Atomic: case clang::Type::Adjusted: + case clang::Type::Pipe: break; // pointer type decayed from an array or function type. @@ -5008,6 +5010,7 @@ ClangASTContext::GetFormat (lldb::opaque_compiler_type_t type) case clang::Type::TemplateSpecialization: case clang::Type::Atomic: case clang::Type::Adjusted: + case clang::Type::Pipe: break; // pointer type decayed from an array or function type. @@ -9969,6 +9972,18 @@ ClangASTContext::DeclContextGetName (void *opaque_decl_ctx) return ConstString(); } +ConstString +ClangASTContext::DeclContextGetScopeQualifiedName (void *opaque_decl_ctx) +{ + if (opaque_decl_ctx) + { + clang::NamedDecl *named_decl = llvm::dyn_cast<clang::NamedDecl>((clang::DeclContext *)opaque_decl_ctx); + if (named_decl) + return ConstString(llvm::StringRef(named_decl->getQualifiedNameAsString())); + } + return ConstString(); +} + bool ClangASTContext::DeclContextIsClassMethod (void *opaque_decl_ctx, lldb::LanguageType *language_ptr, diff --git a/contrib/llvm/tools/lldb/source/Symbol/CompilerDeclContext.cpp b/contrib/llvm/tools/lldb/source/Symbol/CompilerDeclContext.cpp index e44cee6..8bee1b4 100644 --- a/contrib/llvm/tools/lldb/source/Symbol/CompilerDeclContext.cpp +++ b/contrib/llvm/tools/lldb/source/Symbol/CompilerDeclContext.cpp @@ -38,6 +38,15 @@ CompilerDeclContext::GetName () const return ConstString(); } +ConstString +CompilerDeclContext::GetScopeQualifiedName () const +{ + if (IsValid()) + return m_type_system->DeclContextGetScopeQualifiedName(m_opaque_decl_ctx); + else + return ConstString(); +} + bool CompilerDeclContext::IsStructUnionOrClass () const { diff --git a/contrib/llvm/tools/lldb/source/Symbol/LineTable.cpp b/contrib/llvm/tools/lldb/source/Symbol/LineTable.cpp index 01c1718..f9a42a7 100644 --- a/contrib/llvm/tools/lldb/source/Symbol/LineTable.cpp +++ b/contrib/llvm/tools/lldb/source/Symbol/LineTable.cpp @@ -143,6 +143,13 @@ LineTable::InsertSequence (LineSequence* sequence) entry_collection::iterator end_pos = m_entries.end(); LineTable::Entry::LessThanBinaryPredicate less_than_bp(this); entry_collection::iterator pos = upper_bound(begin_pos, end_pos, entry, less_than_bp); + + // We should never insert a sequence in the middle of another sequence + if (pos != begin_pos) { + while (pos < end_pos && !((pos - 1)->is_terminal_entry)) + pos++; + } + #ifdef LLDB_CONFIGURATION_DEBUG // If we aren't inserting at the beginning, the previous entry should // terminate a sequence. diff --git a/contrib/llvm/tools/lldb/source/Symbol/SymbolFile.cpp b/contrib/llvm/tools/lldb/source/Symbol/SymbolFile.cpp index 51e3504..82bbceb 100644 --- a/contrib/llvm/tools/lldb/source/Symbol/SymbolFile.cpp +++ b/contrib/llvm/tools/lldb/source/Symbol/SymbolFile.cpp @@ -134,6 +134,12 @@ SymbolFile::FindFunctions (const RegularExpression& regex, bool include_inlines, return 0; } +void +SymbolFile::GetMangledNamesForFunction(const std::string &scope_qualified_name, std::vector<ConstString> &mangled_names) +{ + return; +} + uint32_t SymbolFile::FindTypes (const SymbolContext& sc, const ConstString &name, const CompilerDeclContext *parent_decl_ctx, bool append, uint32_t max_matches, TypeMap& types) { diff --git a/contrib/llvm/tools/lldb/source/Target/Process.cpp b/contrib/llvm/tools/lldb/source/Target/Process.cpp index 311c695..e4fe419 100644 --- a/contrib/llvm/tools/lldb/source/Target/Process.cpp +++ b/contrib/llvm/tools/lldb/source/Target/Process.cpp @@ -6515,3 +6515,65 @@ Process::ResetImageToken(size_t token) if (token < m_image_tokens.size()) m_image_tokens[token] = LLDB_INVALID_IMAGE_TOKEN; } + +Address +Process::AdvanceAddressToNextBranchInstruction (Address default_stop_addr, AddressRange range_bounds) +{ + Target &target = GetTarget(); + DisassemblerSP disassembler_sp; + InstructionList *insn_list = NULL; + + Address retval = default_stop_addr; + + if (target.GetUseFastStepping() == false) + return retval; + if (default_stop_addr.IsValid() == false) + return retval; + + ExecutionContext exe_ctx (this); + const char *plugin_name = nullptr; + const char *flavor = nullptr; + const bool prefer_file_cache = true; + disassembler_sp = Disassembler::DisassembleRange(target.GetArchitecture(), + plugin_name, + flavor, + exe_ctx, + range_bounds, + prefer_file_cache); + if (disassembler_sp.get()) + insn_list = &disassembler_sp->GetInstructionList(); + + if (insn_list == NULL) + { + return retval; + } + + size_t insn_offset = insn_list->GetIndexOfInstructionAtAddress (default_stop_addr); + if (insn_offset == UINT32_MAX) + { + return retval; + } + + uint32_t branch_index = insn_list->GetIndexOfNextBranchInstruction (insn_offset, target); + if (branch_index == UINT32_MAX) + { + return retval; + } + + if (branch_index > insn_offset) + { + Address next_branch_insn_address = insn_list->GetInstructionAtIndex (branch_index)->GetAddress(); + if (next_branch_insn_address.IsValid() && range_bounds.ContainsFileAddress (next_branch_insn_address)) + { + retval = next_branch_insn_address; + } + } + + if (disassembler_sp.get()) + { + // FIXME: The DisassemblerLLVMC has a reference cycle and won't go away if it has any active instructions. + disassembler_sp->GetInstructionList().Clear(); + } + + return retval; +} diff --git a/contrib/llvm/tools/lldb/source/Target/Thread.cpp b/contrib/llvm/tools/lldb/source/Target/Thread.cpp index 9f9da97..551e480 100644 --- a/contrib/llvm/tools/lldb/source/Target/Thread.cpp +++ b/contrib/llvm/tools/lldb/source/Target/Thread.cpp @@ -1591,7 +1591,7 @@ Thread::QueueThreadPlanForStepOut(bool abort_other_plans, Vote stop_vote, Vote run_vote, uint32_t frame_idx, - LazyBool step_out_avoids_code_withoug_debug_info) + LazyBool step_out_avoids_code_without_debug_info) { ThreadPlanSP thread_plan_sp (new ThreadPlanStepOut (*this, addr_context, @@ -1600,7 +1600,7 @@ Thread::QueueThreadPlanForStepOut(bool abort_other_plans, stop_vote, run_vote, frame_idx, - step_out_avoids_code_withoug_debug_info)); + step_out_avoids_code_without_debug_info)); if (thread_plan_sp->ValidatePlan(nullptr)) { @@ -1620,7 +1620,8 @@ Thread::QueueThreadPlanForStepOutNoShouldStop(bool abort_other_plans, bool stop_other_threads, Vote stop_vote, Vote run_vote, - uint32_t frame_idx) + uint32_t frame_idx, + bool continue_to_next_branch) { ThreadPlanSP thread_plan_sp(new ThreadPlanStepOut (*this, addr_context, @@ -1629,7 +1630,8 @@ Thread::QueueThreadPlanForStepOutNoShouldStop(bool abort_other_plans, stop_vote, run_vote, frame_idx, - eLazyBoolNo)); + eLazyBoolNo, + continue_to_next_branch)); ThreadPlanStepOut *new_plan = static_cast<ThreadPlanStepOut *>(thread_plan_sp.get()); new_plan->ClearShouldStopHereCallbacks(); diff --git a/contrib/llvm/tools/lldb/source/Target/ThreadPlanShouldStopHere.cpp b/contrib/llvm/tools/lldb/source/Target/ThreadPlanShouldStopHere.cpp index 88f8db2..55aaaf0 100644 --- a/contrib/llvm/tools/lldb/source/Target/ThreadPlanShouldStopHere.cpp +++ b/contrib/llvm/tools/lldb/source/Target/ThreadPlanShouldStopHere.cpp @@ -135,7 +135,8 @@ ThreadPlanShouldStopHere::DefaultStepFromHereCallback (ThreadPlan *current_plan, stop_others, eVoteNo, eVoteNoOpinion, - frame_index); + frame_index, + true); return return_plan_sp; } diff --git a/contrib/llvm/tools/lldb/source/Target/ThreadPlanStepOut.cpp b/contrib/llvm/tools/lldb/source/Target/ThreadPlanStepOut.cpp index 92403cb..82b823b 100644 --- a/contrib/llvm/tools/lldb/source/Target/ThreadPlanStepOut.cpp +++ b/contrib/llvm/tools/lldb/source/Target/ThreadPlanStepOut.cpp @@ -18,6 +18,7 @@ #include "lldb/Core/ValueObjectConstResult.h" #include "lldb/Symbol/Block.h" #include "lldb/Symbol/Function.h" +#include "lldb/Symbol/Symbol.h" #include "lldb/Symbol/Type.h" #include "lldb/Target/ABI.h" #include "lldb/Target/Process.h" @@ -44,7 +45,8 @@ ThreadPlanStepOut::ThreadPlanStepOut Vote stop_vote, Vote run_vote, uint32_t frame_idx, - LazyBool step_out_avoids_code_without_debug_info + LazyBool step_out_avoids_code_without_debug_info, + bool continue_to_next_branch ) : ThreadPlan (ThreadPlan::eKindStepOut, "Step out", thread, stop_vote, run_vote), ThreadPlanShouldStopHere (this), @@ -86,7 +88,8 @@ ThreadPlanStepOut::ThreadPlanStepOut eVoteNoOpinion, eVoteNoOpinion, frame_idx - 1, - eLazyBoolNo)); + eLazyBoolNo, + continue_to_next_branch)); static_cast<ThreadPlanStepOut *>(m_step_out_to_inline_plan_sp.get())->SetShouldStopHereCallbacks(nullptr, nullptr); m_step_out_to_inline_plan_sp->SetPrivate(true); } @@ -101,7 +104,27 @@ ThreadPlanStepOut::ThreadPlanStepOut // Find the return address and set a breakpoint there: // FIXME - can we do this more securely if we know first_insn? - m_return_addr = return_frame_sp->GetFrameCodeAddress().GetLoadAddress(&m_thread.GetProcess()->GetTarget()); + Address return_address (return_frame_sp->GetFrameCodeAddress()); + if (continue_to_next_branch) + { + SymbolContext return_address_sc; + AddressRange range; + Address return_address_decr_pc = return_address; + if (return_address_decr_pc.GetOffset() > 0) + return_address_decr_pc.Slide (-1); + + return_address_decr_pc.CalculateSymbolContext (&return_address_sc, lldb::eSymbolContextLineEntry); + if (return_address_sc.line_entry.IsValid()) + { + range = return_address_sc.line_entry.GetSameLineContiguousAddressRange(); + if (range.GetByteSize() > 0) + { + return_address = m_thread.GetProcess()->AdvanceAddressToNextBranchInstruction (return_address, + range); + } + } + } + m_return_addr = return_address.GetLoadAddress(&m_thread.GetProcess()->GetTarget()); if (m_return_addr == LLDB_INVALID_ADDRESS) return; diff --git a/contrib/llvm/tools/lldb/source/Target/ThreadPlanStepOverRange.cpp b/contrib/llvm/tools/lldb/source/Target/ThreadPlanStepOverRange.cpp index 08655be..2e731a8 100644 --- a/contrib/llvm/tools/lldb/source/Target/ThreadPlanStepOverRange.cpp +++ b/contrib/llvm/tools/lldb/source/Target/ThreadPlanStepOverRange.cpp @@ -185,7 +185,8 @@ ThreadPlanStepOverRange::ShouldStop (Event *event_ptr) stop_others, eVoteNo, eVoteNoOpinion, - 0); + 0, + true); break; } else diff --git a/contrib/llvm/tools/lli/ChildTarget/ChildTarget.cpp b/contrib/llvm/tools/lli/ChildTarget/ChildTarget.cpp index 6c537d4..0b75e20 100644 --- a/contrib/llvm/tools/lli/ChildTarget/ChildTarget.cpp +++ b/contrib/llvm/tools/lli/ChildTarget/ChildTarget.cpp @@ -1,244 +1,69 @@ -#include "llvm/Config/config.h" -#include "../RPCChannel.h" -#include "../RemoteTarget.h" -#include "../RemoteTargetMessage.h" -#include "llvm/Support/Memory.h" -#include <assert.h> -#include <map> -#include <stdint.h> -#include <string> -#include <vector> +#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h" +#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/Process.h" +#include <sstream> -using namespace llvm; +#include "../RemoteJITUtils.h" -class LLIChildTarget { -public: - void initialize(); - LLIMessageType waitForIncomingMessage(); - void handleMessage(LLIMessageType messageType); - RemoteTarget *RT; - RPCChannel RPC; +using namespace llvm; +using namespace llvm::orc; +using namespace llvm::sys; -private: - // Incoming message handlers - void handleAllocateSpace(); - void handleLoadSection(bool IsCode); - void handleExecute(); +#ifdef __x86_64__ +typedef OrcX86_64 HostOrcArch; +#else +typedef OrcGenericArchitecture HostOrcArch; +#endif - // Outgoing message handlers - void sendChildActive(); - void sendAllocationResult(uint64_t Addr); - void sendLoadStatus(uint32_t Status); - void sendExecutionComplete(int Result); +int main(int argc, char *argv[]) { - // OS-specific functions - void initializeConnection(); - int WriteBytes(const void *Data, size_t Size) { - return RPC.WriteBytes(Data, Size) ? Size : -1; + if (argc != 3) { + errs() << "Usage: " << argv[0] << " <input fd> <output fd>\n"; + return 1; } - int ReadBytes(void *Data, size_t Size) { - return RPC.ReadBytes(Data, Size) ? Size : -1; - } - - // Communication handles (OS-specific) - void *ConnectionData; -}; - -int main() { - LLIChildTarget ThisChild; - ThisChild.RT = new RemoteTarget(); - ThisChild.initialize(); - LLIMessageType MsgType; - do { - MsgType = ThisChild.waitForIncomingMessage(); - ThisChild.handleMessage(MsgType); - } while (MsgType != LLI_Terminate && - MsgType != LLI_Error); - delete ThisChild.RT; - return 0; -} -// Public methods -void LLIChildTarget::initialize() { - RPC.createClient(); - sendChildActive(); -} + int InFD; + int OutFD; + { + std::istringstream InFDStream(argv[1]), OutFDStream(argv[2]); + InFDStream >> InFD; + OutFDStream >> OutFD; + } -LLIMessageType LLIChildTarget::waitForIncomingMessage() { - int32_t MsgType = -1; - if (ReadBytes(&MsgType, 4) > 0) - return (LLIMessageType)MsgType; - return LLI_Error; -} + if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr)) { + errs() << "Error loading program symbols.\n"; + return 1; + } -void LLIChildTarget::handleMessage(LLIMessageType messageType) { - switch (messageType) { - case LLI_AllocateSpace: - handleAllocateSpace(); - break; - case LLI_LoadCodeSection: - handleLoadSection(true); - break; - case LLI_LoadDataSection: - handleLoadSection(false); - break; - case LLI_Execute: - handleExecute(); - break; - case LLI_Terminate: - RT->stop(); - break; + auto SymbolLookup = [](const std::string &Name) { + return RTDyldMemoryManager::getSymbolAddressInProcess(Name); + }; + + FDRPCChannel Channel(InFD, OutFD); + typedef remote::OrcRemoteTargetServer<FDRPCChannel, HostOrcArch> JITServer; + JITServer Server(Channel, SymbolLookup); + + while (1) { + JITServer::JITProcId Id = JITServer::InvalidId; + if (auto EC = Server.getNextProcId(Id)) { + errs() << "Error: " << EC.message() << "\n"; + return 1; + } + switch (Id) { + case JITServer::TerminateSessionId: + return 0; default: - // FIXME: Handle error! - break; + if (auto EC = Server.handleKnownProcedure(Id)) { + errs() << "Error: " << EC.message() << "\n"; + return 1; + } + } } -} - -// Incoming message handlers -void LLIChildTarget::handleAllocateSpace() { - // Read and verify the message data size. - uint32_t DataSize = 0; - int rc = ReadBytes(&DataSize, 4); - (void)rc; - assert(rc == 4); - assert(DataSize == 8); - - // Read the message arguments. - uint32_t Alignment = 0; - uint32_t AllocSize = 0; - rc = ReadBytes(&Alignment, 4); - assert(rc == 4); - rc = ReadBytes(&AllocSize, 4); - assert(rc == 4); - - // Allocate the memory. - uint64_t Addr; - RT->allocateSpace(AllocSize, Alignment, Addr); - - // Send AllocationResult message. - sendAllocationResult(Addr); -} - -void LLIChildTarget::handleLoadSection(bool IsCode) { - // Read the message data size. - uint32_t DataSize = 0; - int rc = ReadBytes(&DataSize, 4); - (void)rc; - assert(rc == 4); - - // Read the target load address. - uint64_t Addr = 0; - rc = ReadBytes(&Addr, 8); - assert(rc == 8); - size_t BufferSize = DataSize - 8; - - if (!RT->isAllocatedMemory(Addr, BufferSize)) - return sendLoadStatus(LLI_Status_NotAllocated); - - // Read section data into previously allocated buffer - rc = ReadBytes((void*)Addr, BufferSize); - if (rc != (int)(BufferSize)) - return sendLoadStatus(LLI_Status_IncompleteMsg); - - // If IsCode, mark memory executable - if (IsCode) - sys::Memory::InvalidateInstructionCache((void *)Addr, BufferSize); - - // Send MarkLoadComplete message. - sendLoadStatus(LLI_Status_Success); -} - -void LLIChildTarget::handleExecute() { - // Read the message data size. - uint32_t DataSize = 0; - int rc = ReadBytes(&DataSize, 4); - (void)rc; - assert(rc == 4); - assert(DataSize == 8); - - // Read the target address. - uint64_t Addr = 0; - rc = ReadBytes(&Addr, 8); - assert(rc == 8); - - // Call function - int32_t Result = -1; - RT->executeCode(Addr, Result); - - // Send ExecutionResult message. - sendExecutionComplete(Result); -} - -// Outgoing message handlers -void LLIChildTarget::sendChildActive() { - // Write the message type. - uint32_t MsgType = (uint32_t)LLI_ChildActive; - int rc = WriteBytes(&MsgType, 4); - (void)rc; - assert(rc == 4); - - // Write the data size. - uint32_t DataSize = 0; - rc = WriteBytes(&DataSize, 4); - assert(rc == 4); -} - -void LLIChildTarget::sendAllocationResult(uint64_t Addr) { - // Write the message type. - uint32_t MsgType = (uint32_t)LLI_AllocationResult; - int rc = WriteBytes(&MsgType, 4); - (void)rc; - assert(rc == 4); - - // Write the data size. - uint32_t DataSize = 8; - rc = WriteBytes(&DataSize, 4); - assert(rc == 4); - - // Write the allocated address. - rc = WriteBytes(&Addr, 8); - assert(rc == 8); -} - -void LLIChildTarget::sendLoadStatus(uint32_t Status) { - // Write the message type. - uint32_t MsgType = (uint32_t)LLI_LoadResult; - int rc = WriteBytes(&MsgType, 4); - (void)rc; - assert(rc == 4); - - // Write the data size. - uint32_t DataSize = 4; - rc = WriteBytes(&DataSize, 4); - assert(rc == 4); - - // Write the result. - rc = WriteBytes(&Status, 4); - assert(rc == 4); -} - -void LLIChildTarget::sendExecutionComplete(int Result) { - // Write the message type. - uint32_t MsgType = (uint32_t)LLI_ExecutionResult; - int rc = WriteBytes(&MsgType, 4); - (void)rc; - assert(rc == 4); + close(InFD); + close(OutFD); - // Write the data size. - uint32_t DataSize = 4; - rc = WriteBytes(&DataSize, 4); - assert(rc == 4); - - // Write the result. - rc = WriteBytes(&Result, 4); - assert(rc == 4); + return 0; } - -#ifdef LLVM_ON_UNIX -#include "../Unix/RPCChannel.inc" -#endif - -#ifdef LLVM_ON_WIN32 -#include "../Windows/RPCChannel.inc" -#endif diff --git a/contrib/llvm/tools/lli/OrcLazyJIT.cpp b/contrib/llvm/tools/lli/OrcLazyJIT.cpp index 4235145..7f483f7 100644 --- a/contrib/llvm/tools/lli/OrcLazyJIT.cpp +++ b/contrib/llvm/tools/lli/OrcLazyJIT.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "OrcLazyJIT.h" -#include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h" +#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DynamicLibrary.h" #include <cstdio> diff --git a/contrib/llvm/tools/lli/OrcLazyJIT.h b/contrib/llvm/tools/lli/OrcLazyJIT.h index bb4da33..2a5b31d 100644 --- a/contrib/llvm/tools/lli/OrcLazyJIT.h +++ b/contrib/llvm/tools/lli/OrcLazyJIT.h @@ -105,7 +105,9 @@ public: // Add the module to the JIT. std::vector<std::unique_ptr<Module>> S; S.push_back(std::move(M)); - auto H = CODLayer.addModuleSet(std::move(S), nullptr, std::move(Resolver)); + auto H = CODLayer.addModuleSet(std::move(S), + llvm::make_unique<SectionMemoryManager>(), + std::move(Resolver)); // Run the static constructors, and save the static destructor runner for // execution when the JIT is torn down. diff --git a/contrib/llvm/tools/lli/RPCChannel.h b/contrib/llvm/tools/lli/RPCChannel.h deleted file mode 100644 index ebd3c65..0000000 --- a/contrib/llvm/tools/lli/RPCChannel.h +++ /dev/null @@ -1,49 +0,0 @@ -//===---------- RPCChannel.h - LLVM out-of-process JIT execution ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Definition of the RemoteTargetExternal class which executes JITed code in a -// separate process from where it was built. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLI_RPCCHANNEL_H -#define LLVM_TOOLS_LLI_RPCCHANNEL_H - -#include <stdlib.h> -#include <string> - -namespace llvm { - -class RPCChannel { -public: - std::string ChildName; - - RPCChannel() {} - ~RPCChannel(); - - /// Start the remote process. - /// - /// @returns True on success. On failure, ErrorMsg is updated with - /// descriptive text of the encountered error. - bool createServer(); - - bool createClient(); - - // This will get filled in as a point to an OS-specific structure. - void *ConnectionData; - - bool WriteBytes(const void *Data, size_t Size); - bool ReadBytes(void *Data, size_t Size); - - void Wait(); -}; - -} // end namespace llvm - -#endif diff --git a/contrib/llvm/tools/lli/RemoteJITUtils.h b/contrib/llvm/tools/lli/RemoteJITUtils.h new file mode 100644 index 0000000..a3f3fa0 --- /dev/null +++ b/contrib/llvm/tools/lli/RemoteJITUtils.h @@ -0,0 +1,131 @@ +//===-- RemoteJITUtils.h - Utilities for remote-JITing with LLI -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Utilities for remote-JITing with LLI. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLI_REMOTEJITUTILS_H +#define LLVM_TOOLS_LLI_REMOTEJITUTILS_H + +#include "llvm/ExecutionEngine/Orc/RPCChannel.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" + +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <unistd.h> +#else +#include <io.h> +#endif + +/// RPC channel that reads from and writes from file descriptors. +class FDRPCChannel : public llvm::orc::remote::RPCChannel { +public: + FDRPCChannel(int InFD, int OutFD) : InFD(InFD), OutFD(OutFD) {} + + std::error_code readBytes(char *Dst, unsigned Size) override { + assert(Dst && "Attempt to read into null."); + ssize_t ReadResult = ::read(InFD, Dst, Size); + if (ReadResult != (ssize_t)Size) + return std::error_code(errno, std::generic_category()); + return std::error_code(); + } + + std::error_code appendBytes(const char *Src, unsigned Size) override { + assert(Src && "Attempt to append from null."); + ssize_t WriteResult = ::write(OutFD, Src, Size); + if (WriteResult != (ssize_t)Size) + std::error_code(errno, std::generic_category()); + return std::error_code(); + } + + std::error_code send() override { return std::error_code(); } + +private: + int InFD, OutFD; +}; + +// launch the remote process (see lli.cpp) and return a channel to it. +std::unique_ptr<FDRPCChannel> launchRemote(); + +namespace llvm { + +// ForwardingMM - Adapter to connect MCJIT to Orc's Remote memory manager. +class ForwardingMemoryManager : public llvm::RTDyldMemoryManager { +public: + void setMemMgr(std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr) { + this->MemMgr = std::move(MemMgr); + } + + void setResolver(std::unique_ptr<RuntimeDyld::SymbolResolver> Resolver) { + this->Resolver = std::move(Resolver); + } + + uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, + StringRef SectionName) override { + return MemMgr->allocateCodeSection(Size, Alignment, SectionID, SectionName); + } + + uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, StringRef SectionName, + bool IsReadOnly) override { + return MemMgr->allocateDataSection(Size, Alignment, SectionID, SectionName, + IsReadOnly); + } + + void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign, + uintptr_t RODataSize, uint32_t RODataAlign, + uintptr_t RWDataSize, + uint32_t RWDataAlign) override { + MemMgr->reserveAllocationSpace(CodeSize, CodeAlign, RODataSize, RODataAlign, + RWDataSize, RWDataAlign); + } + + bool needsToReserveAllocationSpace() override { + return MemMgr->needsToReserveAllocationSpace(); + } + + void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, + size_t Size) override { + MemMgr->registerEHFrames(Addr, LoadAddr, Size); + } + + void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, + size_t Size) override { + MemMgr->deregisterEHFrames(Addr, LoadAddr, Size); + } + + bool finalizeMemory(std::string *ErrMsg = nullptr) override { + return MemMgr->finalizeMemory(ErrMsg); + } + + void notifyObjectLoaded(RuntimeDyld &RTDyld, + const object::ObjectFile &Obj) override { + MemMgr->notifyObjectLoaded(RTDyld, Obj); + } + + // Don't hide the sibling notifyObjectLoaded from RTDyldMemoryManager. + using RTDyldMemoryManager::notifyObjectLoaded; + + RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) override { + return Resolver->findSymbol(Name); + } + + RuntimeDyld::SymbolInfo + findSymbolInLogicalDylib(const std::string &Name) override { + return Resolver->findSymbolInLogicalDylib(Name); + } + +private: + std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr; + std::unique_ptr<RuntimeDyld::SymbolResolver> Resolver; +}; +} + +#endif diff --git a/contrib/llvm/tools/lli/RemoteMemoryManager.cpp b/contrib/llvm/tools/lli/RemoteMemoryManager.cpp deleted file mode 100644 index 0a16210..0000000 --- a/contrib/llvm/tools/lli/RemoteMemoryManager.cpp +++ /dev/null @@ -1,174 +0,0 @@ -//===---- RemoteMemoryManager.cpp - Recording memory manager --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This memory manager allocates local storage and keeps a record of each -// allocation. Iterators are provided for all data and code allocations. -// -//===----------------------------------------------------------------------===// - -#include "RemoteMemoryManager.h" -#include "llvm/ExecutionEngine/ExecutionEngine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "lli" - -RemoteMemoryManager::~RemoteMemoryManager() { - for (SmallVector<Allocation, 2>::iterator - I = AllocatedSections.begin(), E = AllocatedSections.end(); - I != E; ++I) - sys::Memory::releaseMappedMemory(I->MB); -} - -uint8_t *RemoteMemoryManager:: -allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, - StringRef SectionName) { - // The recording memory manager is just a local copy of the remote target. - // The alignment requirement is just stored here for later use. Regular - // heap storage is sufficient here, but we're using mapped memory to work - // around a bug in MCJIT. - sys::MemoryBlock Block = allocateSection(Size); - // AllocatedSections will own this memory. - AllocatedSections.push_back( Allocation(Block, Alignment, true) ); - // UnmappedSections has the same information but does not own the memory. - UnmappedSections.push_back( Allocation(Block, Alignment, true) ); - return (uint8_t*)Block.base(); -} - -uint8_t *RemoteMemoryManager:: -allocateDataSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID, StringRef SectionName, - bool IsReadOnly) { - // The recording memory manager is just a local copy of the remote target. - // The alignment requirement is just stored here for later use. Regular - // heap storage is sufficient here, but we're using mapped memory to work - // around a bug in MCJIT. - sys::MemoryBlock Block = allocateSection(Size); - // AllocatedSections will own this memory. - AllocatedSections.push_back( Allocation(Block, Alignment, false) ); - // UnmappedSections has the same information but does not own the memory. - UnmappedSections.push_back( Allocation(Block, Alignment, false) ); - return (uint8_t*)Block.base(); -} - -sys::MemoryBlock RemoteMemoryManager::allocateSection(uintptr_t Size) { - std::error_code ec; - sys::MemoryBlock MB = sys::Memory::allocateMappedMemory(Size, - &Near, - sys::Memory::MF_READ | - sys::Memory::MF_WRITE, - ec); - assert(!ec && MB.base()); - - // FIXME: This is part of a work around to keep sections near one another - // when MCJIT performs relocations after code emission but before - // the generated code is moved to the remote target. - // Save this address as the basis for our next request - Near = MB; - return MB; -} - -void RemoteMemoryManager::notifyObjectLoaded(ExecutionEngine *EE, - const object::ObjectFile &Obj) { - // The client should have called setRemoteTarget() before triggering any - // code generation. - assert(Target); - if (!Target) - return; - - // FIXME: Make this function thread safe. - - // Lay out our sections in order, with all the code sections first, then - // all the data sections. - uint64_t CurOffset = 0; - unsigned MaxAlign = Target->getPageAlignment(); - SmallVector<std::pair<Allocation, uint64_t>, 16> Offsets; - unsigned NumSections = UnmappedSections.size(); - // We're going to go through the list twice to separate code and data, but - // it's a very small list, so that's OK. - for (size_t i = 0, e = NumSections; i != e; ++i) { - Allocation &Section = UnmappedSections[i]; - if (Section.IsCode) { - unsigned Size = Section.MB.size(); - unsigned Align = Section.Alignment; - DEBUG(dbgs() << "code region: size " << Size - << ", alignment " << Align << "\n"); - // Align the current offset up to whatever is needed for the next - // section. - CurOffset = (CurOffset + Align - 1) / Align * Align; - // Save off the address of the new section and allocate its space. - Offsets.push_back(std::pair<Allocation,uint64_t>(Section, CurOffset)); - CurOffset += Size; - } - } - // Adjust to keep code and data aligned on separate pages. - CurOffset = (CurOffset + MaxAlign - 1) / MaxAlign * MaxAlign; - for (size_t i = 0, e = NumSections; i != e; ++i) { - Allocation &Section = UnmappedSections[i]; - if (!Section.IsCode) { - unsigned Size = Section.MB.size(); - unsigned Align = Section.Alignment; - DEBUG(dbgs() << "data region: size " << Size - << ", alignment " << Align << "\n"); - // Align the current offset up to whatever is needed for the next - // section. - CurOffset = (CurOffset + Align - 1) / Align * Align; - // Save off the address of the new section and allocate its space. - Offsets.push_back(std::pair<Allocation,uint64_t>(Section, CurOffset)); - CurOffset += Size; - } - } - - // Allocate space in the remote target. - uint64_t RemoteAddr; - if (!Target->allocateSpace(CurOffset, MaxAlign, RemoteAddr)) - report_fatal_error(Target->getErrorMsg()); - - // Map the section addresses so relocations will get updated in the local - // copies of the sections. - for (unsigned i = 0, e = Offsets.size(); i != e; ++i) { - uint64_t Addr = RemoteAddr + Offsets[i].second; - EE->mapSectionAddress(const_cast<void*>(Offsets[i].first.MB.base()), Addr); - - DEBUG(dbgs() << " Mapping local: " << Offsets[i].first.MB.base() - << " to remote: 0x" << format("%llx", Addr) << "\n"); - - MappedSections[Addr] = Offsets[i].first; - } - - UnmappedSections.clear(); -} - -bool RemoteMemoryManager::finalizeMemory(std::string *ErrMsg) { - // FIXME: Make this function thread safe. - for (DenseMap<uint64_t, Allocation>::iterator - I = MappedSections.begin(), E = MappedSections.end(); - I != E; ++I) { - uint64_t RemoteAddr = I->first; - const Allocation &Section = I->second; - if (Section.IsCode) { - if (!Target->loadCode(RemoteAddr, Section.MB.base(), Section.MB.size())) - report_fatal_error(Target->getErrorMsg()); - DEBUG(dbgs() << " loading code: " << Section.MB.base() - << " to remote: 0x" << format("%llx", RemoteAddr) << "\n"); - } else { - if (!Target->loadData(RemoteAddr, Section.MB.base(), Section.MB.size())) - report_fatal_error(Target->getErrorMsg()); - DEBUG(dbgs() << " loading data: " << Section.MB.base() - << " to remote: 0x" << format("%llx", RemoteAddr) << "\n"); - } - } - - MappedSections.clear(); - - return false; -} diff --git a/contrib/llvm/tools/lli/RemoteMemoryManager.h b/contrib/llvm/tools/lli/RemoteMemoryManager.h deleted file mode 100644 index 5733fa5..0000000 --- a/contrib/llvm/tools/lli/RemoteMemoryManager.h +++ /dev/null @@ -1,101 +0,0 @@ -//===- RemoteMemoryManager.h - LLI MCJIT recording memory manager ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This memory manager allocates local storage and keeps a record of each -// allocation. Iterators are provided for all data and code allocations. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLI_REMOTEMEMORYMANAGER_H -#define LLVM_TOOLS_LLI_REMOTEMEMORYMANAGER_H - -#include "RemoteTarget.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Memory.h" -#include <utility> - -namespace llvm { - -class RemoteMemoryManager : public RTDyldMemoryManager { -public: - // Notice that this structure takes ownership of the memory allocated. - struct Allocation { - Allocation() {} - Allocation(sys::MemoryBlock mb, unsigned a, bool code) - : MB(mb), Alignment(a), IsCode(code) {} - - sys::MemoryBlock MB; - unsigned Alignment; - bool IsCode; - }; - -private: - // This vector contains Allocation objects for all sections which we have - // allocated. This vector effectively owns the memory associated with the - // allocations. - SmallVector<Allocation, 2> AllocatedSections; - - // This vector contains pointers to Allocation objects for any sections we - // have allocated locally but have not yet remapped for the remote target. - // When we receive notification of a completed module load, we will map - // these sections into the remote target. - SmallVector<Allocation, 2> UnmappedSections; - - // This map tracks the sections we have remapped for the remote target - // but have not yet copied to the target. - DenseMap<uint64_t, Allocation> MappedSections; - - // FIXME: This is part of a work around to keep sections near one another - // when MCJIT performs relocations after code emission but before - // the generated code is moved to the remote target. - sys::MemoryBlock Near; - sys::MemoryBlock allocateSection(uintptr_t Size); - - RemoteTarget *Target; - -public: - RemoteMemoryManager() : Target(nullptr) {} - ~RemoteMemoryManager() override; - - uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID, - StringRef SectionName) override; - - uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID, StringRef SectionName, - bool IsReadOnly) override; - - // For now, remote symbol resolution is not support in lli. The MCJIT - // interface does support this, but clients must provide their own - // mechanism for finding remote symbol addresses. MCJIT will resolve - // symbols from Modules it contains. - uint64_t getSymbolAddress(const std::string &Name) override { return 0; } - - void notifyObjectLoaded(ExecutionEngine *EE, - const object::ObjectFile &Obj) override; - - bool finalizeMemory(std::string *ErrMsg) override; - - // For now, remote EH frame registration isn't supported. Remote symbol - // resolution is a prerequisite to supporting remote EH frame registration. - void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, - size_t Size) override {} - void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, - size_t Size) override {} - - // This is a non-interface function used by lli - void setRemoteTarget(RemoteTarget *T) { Target = T; } -}; - -} // end namespace llvm - -#endif diff --git a/contrib/llvm/tools/lli/RemoteTarget.cpp b/contrib/llvm/tools/lli/RemoteTarget.cpp deleted file mode 100644 index 95e1511..0000000 --- a/contrib/llvm/tools/lli/RemoteTarget.cpp +++ /dev/null @@ -1,71 +0,0 @@ -//===- RemoteTarget.cpp - LLVM Remote process JIT execution -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Implementation of the RemoteTarget class which executes JITed code in a -// separate address range from where it was built. -// -//===----------------------------------------------------------------------===// - -#include "RemoteTarget.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/DataTypes.h" -#include "llvm/Support/Memory.h" -#include <stdlib.h> -#include <string> - -using namespace llvm; - -//////////////////////////////////////////////////////////////////////////////// -// Simulated remote execution -// -// This implementation will simply move generated code and data to a new memory -// location in the current executable and let it run from there. -//////////////////////////////////////////////////////////////////////////////// - -bool RemoteTarget::allocateSpace(size_t Size, unsigned Alignment, - uint64_t &Address) { - sys::MemoryBlock *Prev = Allocations.size() ? &Allocations.back() : nullptr; - sys::MemoryBlock Mem = sys::Memory::AllocateRWX(Size, Prev, &ErrorMsg); - if (Mem.base() == nullptr) - return false; - if ((uintptr_t)Mem.base() % Alignment) { - ErrorMsg = "unable to allocate sufficiently aligned memory"; - return false; - } - Address = reinterpret_cast<uint64_t>(Mem.base()); - Allocations.push_back(Mem); - return true; -} - -bool RemoteTarget::loadData(uint64_t Address, const void *Data, size_t Size) { - memcpy ((void*)Address, Data, Size); - return true; -} - -bool RemoteTarget::loadCode(uint64_t Address, const void *Data, size_t Size) { - memcpy ((void*)Address, Data, Size); - sys::MemoryBlock Mem((void*)Address, Size); - sys::Memory::setExecutable(Mem, &ErrorMsg); - return true; -} - -bool RemoteTarget::executeCode(uint64_t Address, int &RetVal) { - int (*fn)() = (int(*)())Address; - RetVal = fn(); - return true; -} - -bool RemoteTarget::create() { - return true; -} - -void RemoteTarget::stop() { - for (unsigned i = 0, e = Allocations.size(); i != e; ++i) - sys::Memory::ReleaseRWX(Allocations[i]); -} diff --git a/contrib/llvm/tools/lli/RemoteTarget.h b/contrib/llvm/tools/lli/RemoteTarget.h deleted file mode 100644 index ee758a2..0000000 --- a/contrib/llvm/tools/lli/RemoteTarget.h +++ /dev/null @@ -1,122 +0,0 @@ -//===- RemoteTarget.h - LLVM Remote process JIT execution ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Definition of the RemoteTarget class which executes JITed code in a -// separate address range from where it was built. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLI_REMOTETARGET_H -#define LLVM_TOOLS_LLI_REMOTETARGET_H - -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/DataTypes.h" -#include "llvm/Support/Memory.h" -#include <stdlib.h> -#include <string> - -namespace llvm { - -class RemoteTarget { - bool IsRunning; - - typedef SmallVector<sys::MemoryBlock, 16> AllocMapType; - AllocMapType Allocations; - -protected: - std::string ErrorMsg; - -public: - StringRef getErrorMsg() const { return ErrorMsg; } - - /// Allocate space in the remote target address space. - /// - /// @param Size Amount of space, in bytes, to allocate. - /// @param Alignment Required minimum alignment for allocated space. - /// @param[out] Address Remote address of the allocated memory. - /// - /// @returns True on success. On failure, ErrorMsg is updated with - /// descriptive text of the encountered error. - virtual bool allocateSpace(size_t Size, - unsigned Alignment, - uint64_t &Address); - - bool isAllocatedMemory(uint64_t Address, uint32_t Size) { - uint64_t AddressEnd = Address + Size; - for (AllocMapType::const_iterator I = Allocations.begin(), - E = Allocations.end(); - I != E; ++I) { - if (Address >= (uint64_t)I->base() && - AddressEnd <= (uint64_t)I->base() + I->size()) - return true; - } - return false; - } - - /// Load data into the target address space. - /// - /// @param Address Destination address in the target process. - /// @param Data Source address in the host process. - /// @param Size Number of bytes to copy. - /// - /// @returns True on success. On failure, ErrorMsg is updated with - /// descriptive text of the encountered error. - virtual bool loadData(uint64_t Address, - const void *Data, - size_t Size); - - /// Load code into the target address space and prepare it for execution. - /// - /// @param Address Destination address in the target process. - /// @param Data Source address in the host process. - /// @param Size Number of bytes to copy. - /// - /// @returns True on success. On failure, ErrorMsg is updated with - /// descriptive text of the encountered error. - virtual bool loadCode(uint64_t Address, - const void *Data, - size_t Size); - - /// Execute code in the target process. The called function is required - /// to be of signature int "(*)(void)". - /// - /// @param Address Address of the loaded function in the target - /// process. - /// @param[out] RetVal The integer return value of the called function. - /// - /// @returns True on success. On failure, ErrorMsg is updated with - /// descriptive text of the encountered error. - virtual bool executeCode(uint64_t Address, - int &RetVal); - - /// Minimum alignment for memory permissions. Used to separate code and - /// data regions to make sure data doesn't get marked as code or vice - /// versa. - /// - /// @returns Page alignment return value. Default of 4k. - virtual unsigned getPageAlignment() { return 4096; } - - /// Start the remote process. - virtual bool create(); - - /// Terminate the remote process. - virtual void stop(); - - RemoteTarget() : IsRunning(false), ErrorMsg("") {} - virtual ~RemoteTarget() { if (IsRunning) stop(); } -private: - // Main processing function for the remote target process. Command messages - // are received on file descriptor CmdFD and responses come back on OutFD. - static void doRemoteTargeting(int CmdFD, int OutFD); -}; - -} // end namespace llvm - -#endif diff --git a/contrib/llvm/tools/lli/RemoteTargetExternal.cpp b/contrib/llvm/tools/lli/RemoteTargetExternal.cpp deleted file mode 100644 index fe46248..0000000 --- a/contrib/llvm/tools/lli/RemoteTargetExternal.cpp +++ /dev/null @@ -1,327 +0,0 @@ -//===---- RemoteTargetExternal.cpp - LLVM out-of-process JIT execution ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Implementation of the RemoteTargetExternal class which executes JITed code -// in a separate process from where it was built. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Config/config.h" -#include "RemoteTarget.h" -#include "RemoteTargetExternal.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/DataTypes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/Memory.h" -#include "llvm/Support/Program.h" -#include "llvm/Support/raw_ostream.h" -#include <string> - -using namespace llvm; - -#define DEBUG_TYPE "lli" - -bool RemoteTargetExternal::allocateSpace(size_t Size, unsigned Alignment, - uint64_t &Address) { - DEBUG(dbgs() << "Message [allocate space] size: " << Size << - ", align: " << Alignment << "\n"); - if (!SendAllocateSpace(Alignment, Size)) { - ErrorMsg += ", (RemoteTargetExternal::allocateSpace)"; - return false; - } - if (!Receive(LLI_AllocationResult, Address)) { - ErrorMsg += ", (RemoteTargetExternal::allocateSpace)"; - return false; - } - if (Address == 0) { - ErrorMsg += "failed allocation, (RemoteTargetExternal::allocateSpace)"; - return false; - } - DEBUG(dbgs() << "Message [allocate space] addr: 0x" << - format("%llx", Address) << "\n"); - return true; -} - -bool RemoteTargetExternal::loadData(uint64_t Address, const void *Data, size_t Size) { - DEBUG(dbgs() << "Message [load data] addr: 0x" << format("%llx", Address) << - ", size: " << Size << "\n"); - if (!SendLoadSection(Address, Data, (uint32_t)Size, false)) { - ErrorMsg += ", (RemoteTargetExternal::loadData)"; - return false; - } - int Status = LLI_Status_Success; - if (!Receive(LLI_LoadResult, Status)) { - ErrorMsg += ", (RemoteTargetExternal::loadData)"; - return false; - } - if (Status == LLI_Status_IncompleteMsg) { - ErrorMsg += "incomplete load data, (RemoteTargetExternal::loadData)"; - return false; - } - if (Status == LLI_Status_NotAllocated) { - ErrorMsg += "data memory not allocated, (RemoteTargetExternal::loadData)"; - return false; - } - DEBUG(dbgs() << "Message [load data] complete\n"); - return true; -} - -bool RemoteTargetExternal::loadCode(uint64_t Address, const void *Data, size_t Size) { - DEBUG(dbgs() << "Message [load code] addr: 0x" << format("%llx", Address) << - ", size: " << Size << "\n"); - if (!SendLoadSection(Address, Data, (uint32_t)Size, true)) { - ErrorMsg += ", (RemoteTargetExternal::loadCode)"; - return false; - } - int Status = LLI_Status_Success; - if (!Receive(LLI_LoadResult, Status)) { - ErrorMsg += ", (RemoteTargetExternal::loadCode)"; - return false; - } - if (Status == LLI_Status_IncompleteMsg) { - ErrorMsg += "incomplete load data, (RemoteTargetExternal::loadData)"; - return false; - } - if (Status == LLI_Status_NotAllocated) { - ErrorMsg += "data memory not allocated, (RemoteTargetExternal::loadData)"; - return false; - } - DEBUG(dbgs() << "Message [load code] complete\n"); - return true; -} - -bool RemoteTargetExternal::executeCode(uint64_t Address, int32_t &RetVal) { - DEBUG(dbgs() << "Message [exectue code] addr: " << Address << "\n"); - if (!SendExecute(Address)) { - ErrorMsg += ", (RemoteTargetExternal::executeCode)"; - return false; - } - if (!Receive(LLI_ExecutionResult, RetVal)) { - ErrorMsg += ", (RemoteTargetExternal::executeCode)"; - return false; - } - DEBUG(dbgs() << "Message [exectue code] return: " << RetVal << "\n"); - return true; -} - -void RemoteTargetExternal::stop() { - SendTerminate(); - RPC.Wait(); -} - -bool RemoteTargetExternal::SendAllocateSpace(uint32_t Alignment, uint32_t Size) { - if (!SendHeader(LLI_AllocateSpace)) { - ErrorMsg += ", (RemoteTargetExternal::SendAllocateSpace)"; - return false; - } - - AppendWrite((const void *)&Alignment, 4); - AppendWrite((const void *)&Size, 4); - - if (!SendPayload()) { - ErrorMsg += ", (RemoteTargetExternal::SendAllocateSpace)"; - return false; - } - return true; -} - -bool RemoteTargetExternal::SendLoadSection(uint64_t Addr, - const void *Data, - uint32_t Size, - bool IsCode) { - LLIMessageType MsgType = IsCode ? LLI_LoadCodeSection : LLI_LoadDataSection; - if (!SendHeader(MsgType)) { - ErrorMsg += ", (RemoteTargetExternal::SendLoadSection)"; - return false; - } - - AppendWrite((const void *)&Addr, 8); - AppendWrite(Data, Size); - - if (!SendPayload()) { - ErrorMsg += ", (RemoteTargetExternal::SendLoadSection)"; - return false; - } - return true; -} - -bool RemoteTargetExternal::SendExecute(uint64_t Addr) { - if (!SendHeader(LLI_Execute)) { - ErrorMsg += ", (RemoteTargetExternal::SendExecute)"; - return false; - } - - AppendWrite((const void *)&Addr, 8); - - if (!SendPayload()) { - ErrorMsg += ", (RemoteTargetExternal::SendExecute)"; - return false; - } - return true; -} - -bool RemoteTargetExternal::SendTerminate() { - return SendHeader(LLI_Terminate); - // No data or data size is sent with Terminate -} - -bool RemoteTargetExternal::Receive(LLIMessageType Msg) { - if (!ReceiveHeader(Msg)) - return false; - int Unused; - AppendRead(&Unused, 0); - if (!ReceivePayload()) - return false; - ReceiveData.clear(); - Sizes.clear(); - return true; -} - -bool RemoteTargetExternal::Receive(LLIMessageType Msg, int32_t &Data) { - if (!ReceiveHeader(Msg)) - return false; - AppendRead(&Data, 4); - if (!ReceivePayload()) - return false; - ReceiveData.clear(); - Sizes.clear(); - return true; -} - -bool RemoteTargetExternal::Receive(LLIMessageType Msg, uint64_t &Data) { - if (!ReceiveHeader(Msg)) - return false; - AppendRead(&Data, 8); - if (!ReceivePayload()) - return false; - ReceiveData.clear(); - Sizes.clear(); - return true; -} - -bool RemoteTargetExternal::ReceiveHeader(LLIMessageType ExpectedMsgType) { - assert(ReceiveData.empty() && Sizes.empty() && - "Payload vector not empty to receive header"); - - // Message header, with type to follow - uint32_t MsgType; - if (!ReadBytes(&MsgType, 4)) { - ErrorMsg += ", (RemoteTargetExternal::ReceiveHeader)"; - return false; - } - if (MsgType != (uint32_t)ExpectedMsgType) { - ErrorMsg = "received unexpected message type"; - ErrorMsg += ". Expecting: "; - ErrorMsg += ExpectedMsgType; - ErrorMsg += ", Got: "; - ErrorMsg += MsgType; - return false; - } - return true; -} - -bool RemoteTargetExternal::ReceivePayload() { - assert(!ReceiveData.empty() && - "Payload vector empty to receive"); - assert(ReceiveData.size() == Sizes.size() && - "Unexpected mismatch between data and size"); - - uint32_t TotalSize = 0; - for (int I=0, E=Sizes.size(); I < E; I++) - TotalSize += Sizes[I]; - - // Payload size header - uint32_t DataSize; - if (!ReadBytes(&DataSize, 4)) { - ErrorMsg += ", invalid data size"; - return false; - } - if (DataSize != TotalSize) { - ErrorMsg = "unexpected data size"; - ErrorMsg += ". Expecting: "; - ErrorMsg += TotalSize; - ErrorMsg += ", Got: "; - ErrorMsg += DataSize; - return false; - } - if (DataSize == 0) - return true; - - // Payload itself - for (int I=0, E=Sizes.size(); I < E; I++) { - if (!ReadBytes(ReceiveData[I], Sizes[I])) { - ErrorMsg = "unexpected data while reading message"; - return false; - } - } - - return true; -} - -bool RemoteTargetExternal::SendHeader(LLIMessageType MsgType) { - assert(SendData.empty() && Sizes.empty() && - "Payload vector not empty to send header"); - - // Message header, with type to follow - if (!WriteBytes(&MsgType, 4)) { - ErrorMsg += ", (RemoteTargetExternal::SendHeader)"; - return false; - } - return true; -} - -bool RemoteTargetExternal::SendPayload() { - assert(!SendData.empty() && !Sizes.empty() && - "Payload vector empty to send"); - assert(SendData.size() == Sizes.size() && - "Unexpected mismatch between data and size"); - - uint32_t TotalSize = 0; - for (int I=0, E=Sizes.size(); I < E; I++) - TotalSize += Sizes[I]; - - // Payload size header - if (!WriteBytes(&TotalSize, 4)) { - ErrorMsg += ", invalid data size"; - return false; - } - if (TotalSize == 0) - return true; - - // Payload itself - for (int I=0, E=Sizes.size(); I < E; I++) { - if (!WriteBytes(SendData[I], Sizes[I])) { - ErrorMsg = "unexpected data while writing message"; - return false; - } - } - - SendData.clear(); - Sizes.clear(); - return true; -} - -void RemoteTargetExternal::AppendWrite(const void *Data, uint32_t Size) { - SendData.push_back(Data); - Sizes.push_back(Size); -} - -void RemoteTargetExternal::AppendRead(void *Data, uint32_t Size) { - ReceiveData.push_back(Data); - Sizes.push_back(Size); -} - -#ifdef LLVM_ON_UNIX -#include "Unix/RPCChannel.inc" -#endif - -#ifdef LLVM_ON_WIN32 -#include "Windows/RPCChannel.inc" -#endif diff --git a/contrib/llvm/tools/lli/RemoteTargetExternal.h b/contrib/llvm/tools/lli/RemoteTargetExternal.h deleted file mode 100644 index afe8570..0000000 --- a/contrib/llvm/tools/lli/RemoteTargetExternal.h +++ /dev/null @@ -1,143 +0,0 @@ -//===----- RemoteTargetExternal.h - LLVM out-of-process JIT execution -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Definition of the RemoteTargetExternal class which executes JITed code in a -// separate process from where it was built. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLI_REMOTETARGETEXTERNAL_H -#define LLVM_TOOLS_LLI_REMOTETARGETEXTERNAL_H - -#include "RPCChannel.h" -#include "RemoteTarget.h" -#include "RemoteTargetMessage.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Config/config.h" -#include "llvm/Support/DataTypes.h" -#include "llvm/Support/Memory.h" -#include <stdlib.h> -#include <string> - -namespace llvm { - -class RemoteTargetExternal : public RemoteTarget { - RPCChannel RPC; - - bool WriteBytes(const void *Data, size_t Size) { - return RPC.WriteBytes(Data, Size); - } - - bool ReadBytes(void *Data, size_t Size) { return RPC.ReadBytes(Data, Size); } - -public: - /// Allocate space in the remote target address space. - /// - /// @param Size Amount of space, in bytes, to allocate. - /// @param Alignment Required minimum alignment for allocated space. - /// @param[out] Address Remote address of the allocated memory. - /// - /// @returns True on success. On failure, ErrorMsg is updated with - /// descriptive text of the encountered error. - bool allocateSpace(size_t Size, unsigned Alignment, - uint64_t &Address) override; - - /// Load data into the target address space. - /// - /// @param Address Destination address in the target process. - /// @param Data Source address in the host process. - /// @param Size Number of bytes to copy. - /// - /// @returns True on success. On failure, ErrorMsg is updated with - /// descriptive text of the encountered error. - bool loadData(uint64_t Address, const void *Data, size_t Size) override; - - /// Load code into the target address space and prepare it for execution. - /// - /// @param Address Destination address in the target process. - /// @param Data Source address in the host process. - /// @param Size Number of bytes to copy. - /// - /// @returns True on success. On failure, ErrorMsg is updated with - /// descriptive text of the encountered error. - bool loadCode(uint64_t Address, const void *Data, size_t Size) override; - - /// Execute code in the target process. The called function is required - /// to be of signature int "(*)(void)". - /// - /// @param Address Address of the loaded function in the target - /// process. - /// @param[out] RetVal The integer return value of the called function. - /// - /// @returns True on success. On failure, ErrorMsg is updated with - /// descriptive text of the encountered error. - bool executeCode(uint64_t Address, int &RetVal) override; - - /// Minimum alignment for memory permissions. Used to separate code and - /// data regions to make sure data doesn't get marked as code or vice - /// versa. - /// - /// @returns Page alignment return value. Default of 4k. - unsigned getPageAlignment() override { return 4096; } - - bool create() override { - RPC.ChildName = ChildName; - if (!RPC.createServer()) - return true; - - // We must get Ack from the client (blocking read) - if (!Receive(LLI_ChildActive)) { - ErrorMsg += ", (RPCChannel::create) - Stopping process!"; - stop(); - return false; - } - - return true; - } - - /// Terminate the remote process. - void stop() override; - - RemoteTargetExternal(std::string &Name) : RemoteTarget(), ChildName(Name) {} - ~RemoteTargetExternal() override {} - -private: - std::string ChildName; - - bool SendAllocateSpace(uint32_t Alignment, uint32_t Size); - bool SendLoadSection(uint64_t Addr, - const void *Data, - uint32_t Size, - bool IsCode); - bool SendExecute(uint64_t Addr); - bool SendTerminate(); - - // High-level wrappers for receiving data - bool Receive(LLIMessageType Msg); - bool Receive(LLIMessageType Msg, int32_t &Data); - bool Receive(LLIMessageType Msg, uint64_t &Data); - - // Lower level target-independent read/write to deal with errors - bool ReceiveHeader(LLIMessageType Msg); - bool ReceivePayload(); - bool SendHeader(LLIMessageType Msg); - bool SendPayload(); - - // Functions to append/retrieve data from the payload - SmallVector<const void *, 2> SendData; - SmallVector<void *, 1> ReceiveData; // Future proof - SmallVector<int, 2> Sizes; - void AppendWrite(const void *Data, uint32_t Size); - void AppendRead(void *Data, uint32_t Size); -}; - -} // end namespace llvm - -#endif diff --git a/contrib/llvm/tools/lli/RemoteTargetMessage.h b/contrib/llvm/tools/lli/RemoteTargetMessage.h deleted file mode 100644 index c210e4b..0000000 --- a/contrib/llvm/tools/lli/RemoteTargetMessage.h +++ /dev/null @@ -1,85 +0,0 @@ -//===---- RemoteTargetMessage.h - LLI out-of-process message protocol -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Definition of the LLIMessageType enum which is used for communication with a -// child process for remote execution. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLI_REMOTETARGETMESSAGE_H -#define LLVM_TOOLS_LLI_REMOTETARGETMESSAGE_H - -namespace llvm { - -// LLI messages from parent-to-child or vice versa follow an exceedingly simple -// protocol where the first four bytes represent the message type, the next -// four bytes represent the size of data for the command and following bytes -// represent the actual data. -// -// The protocol is not intended to be robust, secure or fault-tolerant. It is -// only here for testing purposes and is therefore intended to be the simplest -// implementation that will work. It is assumed that the parent and child -// share characteristics like endianness. -// -// Quick description of the protocol: -// -// { Header + Payload Size + Payload } -// -// The protocol message consist of a header, the payload size (which can be -// zero), and the payload itself. The payload can contain any number of items, -// and the size has to be the sum of them all. Each end is responsible for -// reading/writing the correct number of items with the correct sizes. -// -// The current four known exchanges are: -// -// * Allocate Space: -// Parent: { LLI_AllocateSpace, 8, Alignment, Size } -// Child: { LLI_AllocationResult, 8, Address } -// -// * Load Data: -// Parent: { LLI_LoadDataSection, 8+Size, Address, Data } -// Child: { LLI_LoadComplete, 4, StatusCode } -// -// * Load Code: -// Parent: { LLI_LoadCodeSection, 8+Size, Address, Code } -// Child: { LLI_LoadComplete, 4, StatusCode } -// -// * Execute Code: -// Parent: { LLI_Execute, 8, Address } -// Child: { LLI_ExecutionResult, 4, Result } -// -// It is the responsibility of either side to check for correct headers, -// sizes and payloads, since any inconsistency would misalign the pipe, and -// result in data corruption. - -enum LLIMessageType { - LLI_Error = -1, - LLI_ChildActive = 0, // Data = not used - LLI_AllocateSpace, // Data = struct { uint32_t Align, uint_32t Size } - LLI_AllocationResult, // Data = uint64_t Address (child memory space) - - LLI_LoadCodeSection, // Data = uint64_t Address, void * SectionData - LLI_LoadDataSection, // Data = uint64_t Address, void * SectionData - LLI_LoadResult, // Data = uint32_t LLIMessageStatus - - LLI_Execute, // Data = uint64_t Address - LLI_ExecutionResult, // Data = uint32_t Result - - LLI_Terminate // Data = not used -}; - -enum LLIMessageStatus { - LLI_Status_Success = 0, // Operation succeeded - LLI_Status_NotAllocated, // Address+Size not allocated in child space - LLI_Status_IncompleteMsg // Size received doesn't match request -}; - -} // end namespace llvm - -#endif diff --git a/contrib/llvm/tools/lli/Unix/RPCChannel.inc b/contrib/llvm/tools/lli/Unix/RPCChannel.inc deleted file mode 100644 index 6a9ae14..0000000 --- a/contrib/llvm/tools/lli/Unix/RPCChannel.inc +++ /dev/null @@ -1,122 +0,0 @@ -//=- RPCChannel.inc - LLVM out-of-process JIT execution for Unix --=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Implementation of the Unix-specific parts of the RPCChannel class -// which executes JITed code in a separate process from where it was built. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/Errno.h" -#include "llvm/Support/raw_ostream.h" -#include <stdio.h> -#include <stdlib.h> -#include <sys/wait.h> -#include <unistd.h> - -namespace { - -struct ConnectionData_t { - int InputPipe; - int OutputPipe; - - ConnectionData_t(int in, int out) : InputPipe(in), OutputPipe(out) {} -}; - -} // namespace - -namespace llvm { - -bool RPCChannel::createServer() { - int PipeFD[2][2]; - pid_t ChildPID; - - // Create two pipes. - if (pipe(PipeFD[0]) != 0 || pipe(PipeFD[1]) != 0) - perror("Error creating pipe: "); - - ChildPID = fork(); - - if (ChildPID == 0) { - // In the child... - - // Close the parent ends of the pipes - close(PipeFD[0][1]); - close(PipeFD[1][0]); - - // Use our pipes as stdin and stdout - if (PipeFD[0][0] != STDIN_FILENO) { - dup2(PipeFD[0][0], STDIN_FILENO); - close(PipeFD[0][0]); - } - if (PipeFD[1][1] != STDOUT_FILENO) { - dup2(PipeFD[1][1], STDOUT_FILENO); - close(PipeFD[1][1]); - } - - // Execute the child process. - char *args[1] = { nullptr }; - int rc = execv(ChildName.c_str(), args); - if (rc != 0) - perror("Error executing child process: "); - } else { - // In the parent... - - // Close the child ends of the pipes - close(PipeFD[0][0]); - close(PipeFD[1][1]); - - // Store the parent ends of the pipes - ConnectionData = (void *)new ConnectionData_t(PipeFD[1][0], PipeFD[0][1]); - return true; - } - return false; -} - -bool RPCChannel::createClient() { - // Store the parent ends of the pipes - ConnectionData = (void *)new ConnectionData_t(STDIN_FILENO, STDOUT_FILENO); - return true; -} - -void RPCChannel::Wait() { wait(nullptr); } - -static bool CheckError(int rc, size_t Size, const char *Desc) { - if (rc < 0) { - llvm::errs() << "IO Error: " << Desc << ": " << sys::StrError() << '\n'; - return false; - } else if ((size_t)rc != Size) { - std::string ErrorMsg; - char Number[10] = { 0 }; - ErrorMsg += "Expecting "; - sprintf(Number, "%d", (uint32_t)Size); - ErrorMsg += Number; - ErrorMsg += " bytes, Got "; - sprintf(Number, "%d", rc); - ErrorMsg += Number; - llvm::errs() << "RPC Error: " << Desc << ": " << ErrorMsg << '\n'; - return false; - } - return true; -} - -bool RPCChannel::WriteBytes(const void *Data, size_t Size) { - int rc = write(((ConnectionData_t *)ConnectionData)->OutputPipe, Data, Size); - return CheckError(rc, Size, "WriteBytes"); -} - -bool RPCChannel::ReadBytes(void *Data, size_t Size) { - int rc = read(((ConnectionData_t *)ConnectionData)->InputPipe, Data, Size); - return CheckError(rc, Size, "ReadBytes"); -} - -RPCChannel::~RPCChannel() { - delete static_cast<ConnectionData_t *>(ConnectionData); -} - -} // namespace llvm diff --git a/contrib/llvm/tools/lli/Windows/RPCChannel.inc b/contrib/llvm/tools/lli/Windows/RPCChannel.inc deleted file mode 100644 index 82f2acb..0000000 --- a/contrib/llvm/tools/lli/Windows/RPCChannel.inc +++ /dev/null @@ -1,29 +0,0 @@ -//=- RPCChannel.inc - LLVM out-of-process JIT execution for Windows --=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Implementation of the Windows-specific parts of the RPCChannel class -// which executes JITed code in a separate process from where it was built. -// -//===----------------------------------------------------------------------===// - -namespace llvm { - -bool RPCChannel::createServer() { return false; } - -bool RPCChannel::createClient() { return false; } - -bool RPCChannel::WriteBytes(const void *Data, size_t Size) { return false; } - -bool RPCChannel::ReadBytes(void *Data, size_t Size) { return false; } - -void RPCChannel::Wait() {} - -RPCChannel::~RPCChannel() {} - -} // namespace llvm diff --git a/contrib/llvm/tools/lli/lli.cpp b/contrib/llvm/tools/lli/lli.cpp index 9f71406..67e7cbd 100644 --- a/contrib/llvm/tools/lli/lli.cpp +++ b/contrib/llvm/tools/lli/lli.cpp @@ -13,11 +13,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/LLVMContext.h" #include "OrcLazyJIT.h" -#include "RemoteMemoryManager.h" -#include "RemoteTarget.h" -#include "RemoteTargetExternal.h" +#include "RemoteJITUtils.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/ADT/Triple.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/CodeGen/LinkAllCodegenComponents.h" @@ -28,6 +26,7 @@ #include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/ExecutionEngine/OrcMCJITReplacement.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" +#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" @@ -449,7 +448,7 @@ int main(int argc, char **argv, char * const *envp) { RTDyldMemoryManager *RTDyldMM = nullptr; if (!ForceInterpreter) { if (RemoteMCJIT) - RTDyldMM = new RemoteMemoryManager(); + RTDyldMM = new ForwardingMemoryManager(); else RTDyldMM = new SectionMemoryManager(); @@ -582,6 +581,25 @@ int main(int argc, char **argv, char * const *envp) { int Result; + // Sanity check use of remote-jit: LLI currently only supports use of the + // remote JIT on Unix platforms. + if (RemoteMCJIT) { +#ifndef LLVM_ON_UNIX + errs() << "Warning: host does not support external remote targets.\n" + << " Defaulting to local execution execution\n"; + return -1; +#else + if (ChildExecPath.empty()) { + errs() << "-remote-mcjit requires -mcjit-remote-process.\n"; + exit(1); + } else if (!sys::fs::can_execute(ChildExecPath)) { + errs() << "Unable to find usable child executable: '" << ChildExecPath + << "'\n"; + return -1; + } +#endif + } + if (!RemoteMCJIT) { // If the program doesn't explicitly call exit, we will need the Exit // function later on to make an explicit call, so get the function now. @@ -629,66 +647,123 @@ int main(int argc, char **argv, char * const *envp) { // Remote target MCJIT doesn't (yet) support static constructors. No reason // it couldn't. This is a limitation of the LLI implemantation, not the // MCJIT itself. FIXME. - // - RemoteMemoryManager *MM = static_cast<RemoteMemoryManager*>(RTDyldMM); - // Everything is prepared now, so lay out our program for the target - // address space, assign the section addresses to resolve any relocations, - // and send it to the target. - - std::unique_ptr<RemoteTarget> Target; - if (!ChildExecPath.empty()) { // Remote execution on a child process -#ifndef LLVM_ON_UNIX - // FIXME: Remove this pointless fallback mode which causes tests to "pass" - // on platforms where they should XFAIL. - errs() << "Warning: host does not support external remote targets.\n" - << " Defaulting to simulated remote execution\n"; - Target.reset(new RemoteTarget); -#else - if (!sys::fs::can_execute(ChildExecPath)) { - errs() << "Unable to find usable child executable: '" << ChildExecPath - << "'\n"; - return -1; - } - Target.reset(new RemoteTargetExternal(ChildExecPath)); -#endif - } else { - // No child process name provided, use simulated remote execution. - Target.reset(new RemoteTarget); + + // Lanch the remote process and get a channel to it. + std::unique_ptr<FDRPCChannel> C = launchRemote(); + if (!C) { + errs() << "Failed to launch remote JIT.\n"; + exit(1); } - // Give the memory manager a pointer to our remote target interface object. - MM->setRemoteTarget(Target.get()); + // Create a remote target client running over the channel. + typedef orc::remote::OrcRemoteTargetClient<orc::remote::RPCChannel> MyRemote; + ErrorOr<MyRemote> R = MyRemote::Create(*C); + if (!R) { + errs() << "Could not create remote: " << R.getError().message() << "\n"; + exit(1); + } - // Create the remote target. - if (!Target->create()) { - errs() << "ERROR: " << Target->getErrorMsg() << "\n"; - return EXIT_FAILURE; + // Create a remote memory manager. + std::unique_ptr<MyRemote::RCMemoryManager> RemoteMM; + if (auto EC = R->createRemoteMemoryManager(RemoteMM)) { + errs() << "Could not create remote memory manager: " << EC.message() << "\n"; + exit(1); } - // Since we're executing in a (at least simulated) remote address space, - // we can't use the ExecutionEngine::runFunctionAsMain(). We have to - // grab the function address directly here and tell the remote target - // to execute the function. - // - // Our memory manager will map generated code into the remote address - // space as it is loaded and copy the bits over during the finalizeMemory - // operation. - // + // Forward MCJIT's memory manager calls to the remote memory manager. + static_cast<ForwardingMemoryManager*>(RTDyldMM)->setMemMgr( + std::move(RemoteMM)); + + // Forward MCJIT's symbol resolution calls to the remote. + static_cast<ForwardingMemoryManager*>(RTDyldMM)->setResolver( + orc::createLambdaResolver( + [&](const std::string &Name) { + orc::TargetAddress Addr = 0; + if (auto EC = R->getSymbolAddress(Addr, Name)) { + errs() << "Failure during symbol lookup: " << EC.message() << "\n"; + exit(1); + } + return RuntimeDyld::SymbolInfo(Addr, JITSymbolFlags::Exported); + }, + [](const std::string &Name) { return nullptr; } + )); + + // Grab the target address of the JIT'd main function on the remote and call + // it. // FIXME: argv and envp handling. - uint64_t Entry = EE->getFunctionAddress(EntryFn->getName().str()); - + orc::TargetAddress Entry = EE->getFunctionAddress(EntryFn->getName().str()); + EE->finalizeObject(); DEBUG(dbgs() << "Executing '" << EntryFn->getName() << "' at 0x" << format("%llx", Entry) << "\n"); - - if (!Target->executeCode(Entry, Result)) - errs() << "ERROR: " << Target->getErrorMsg() << "\n"; + if (auto EC = R->callIntVoid(Result, Entry)) + errs() << "ERROR: " << EC.message() << "\n"; // Like static constructors, the remote target MCJIT support doesn't handle // this yet. It could. FIXME. - // Stop the remote target - Target->stop(); + // Delete the EE - we need to tear it down *before* we terminate the session + // with the remote, otherwise it'll crash when it tries to release resources + // on a remote that has already been disconnected. + delete EE; + EE = nullptr; + + // Signal the remote target that we're done JITing. + R->terminateSession(); } return Result; } + +std::unique_ptr<FDRPCChannel> launchRemote() { +#ifndef LLVM_ON_UNIX + llvm_unreachable("launchRemote not supported on non-Unix platforms"); +#else + int PipeFD[2][2]; + pid_t ChildPID; + + // Create two pipes. + if (pipe(PipeFD[0]) != 0 || pipe(PipeFD[1]) != 0) + perror("Error creating pipe: "); + + ChildPID = fork(); + + if (ChildPID == 0) { + // In the child... + + // Close the parent ends of the pipes + close(PipeFD[0][1]); + close(PipeFD[1][0]); + + + // Execute the child process. + std::unique_ptr<char[]> ChildPath, ChildIn, ChildOut; + { + ChildPath.reset(new char[ChildExecPath.size() + 1]); + std::copy(ChildExecPath.begin(), ChildExecPath.end(), &ChildPath[0]); + ChildPath[ChildExecPath.size()] = '\0'; + std::string ChildInStr = std::to_string(PipeFD[0][0]); + ChildIn.reset(new char[ChildInStr.size() + 1]); + std::copy(ChildInStr.begin(), ChildInStr.end(), &ChildIn[0]); + ChildIn[ChildInStr.size()] = '\0'; + std::string ChildOutStr = std::to_string(PipeFD[1][1]); + ChildOut.reset(new char[ChildOutStr.size() + 1]); + std::copy(ChildOutStr.begin(), ChildOutStr.end(), &ChildOut[0]); + ChildOut[ChildOutStr.size()] = '\0'; + } + + char * const args[] = { &ChildPath[0], &ChildIn[0], &ChildOut[0], nullptr }; + int rc = execv(ChildExecPath.c_str(), args); + if (rc != 0) + perror("Error executing child process: "); + llvm_unreachable("Error executing child process"); + } + // else we're the parent... + + // Close the child ends of the pipes + close(PipeFD[0][0]); + close(PipeFD[1][1]); + + // Return an RPC channel connected to our end of the pipes. + return llvm::make_unique<FDRPCChannel>(PipeFD[1][0], PipeFD[0][1]); +#endif +} diff --git a/contrib/llvm/tools/llvm-lto/llvm-lto.cpp b/contrib/llvm/tools/llvm-lto/llvm-lto.cpp index 4bc6922..2320511 100644 --- a/contrib/llvm/tools/llvm-lto/llvm-lto.cpp +++ b/contrib/llvm/tools/llvm-lto/llvm-lto.cpp @@ -289,6 +289,7 @@ int main(int argc, char **argv) { CurrentActivity = "loading file '" + InputFilenames[i] + "'"; ErrorOr<std::unique_ptr<LTOModule>> ModuleOrErr = LTOModule::createFromFile(Context, InputFilenames[i].c_str(), Options); + error(ModuleOrErr, "error " + CurrentActivity); std::unique_ptr<LTOModule> &Module = *ModuleOrErr; CurrentActivity = ""; diff --git a/contrib/llvm/tools/llvm-objdump/COFFDump.cpp b/contrib/llvm/tools/llvm-objdump/COFFDump.cpp index f286351..5d21b33 100644 --- a/contrib/llvm/tools/llvm-objdump/COFFDump.cpp +++ b/contrib/llvm/tools/llvm-objdump/COFFDump.cpp @@ -358,13 +358,30 @@ static void printExportTable(const COFFObjectFile *Obj) { uint32_t RVA; if (I->getExportRVA(RVA)) return; - outs() << format(" % 4d %# 8x", Ordinal, RVA); + bool IsForwarder; + if (I->isForwarder(IsForwarder)) + return; + + if (IsForwarder) { + // Export table entries can be used to re-export symbols that + // this COFF file is imported from some DLLs. This is rare. + // In most cases IsForwarder is false. + outs() << format(" % 4d ", Ordinal); + } else { + outs() << format(" % 4d %# 8x", Ordinal, RVA); + } StringRef Name; if (I->getSymbolName(Name)) continue; if (!Name.empty()) outs() << " " << Name; + if (IsForwarder) { + StringRef S; + if (I->getForwardTo(S)) + return; + outs() << " (forwarded to " << S << ")"; + } outs() << "\n"; } } diff --git a/contrib/llvm/tools/llvm-objdump/MachODump.cpp b/contrib/llvm/tools/llvm-objdump/MachODump.cpp index a2f3bc8..258c0b5 100644 --- a/contrib/llvm/tools/llvm-objdump/MachODump.cpp +++ b/contrib/llvm/tools/llvm-objdump/MachODump.cpp @@ -1196,7 +1196,11 @@ static void ProcessMachO(StringRef Filename, MachOObjectFile *MachOOF, PrintSymbolTable(MachOOF); if (UnwindInfo) printMachOUnwindInfo(MachOOF); - if (PrivateHeaders) + if (PrivateHeaders) { + printMachOFileHeader(MachOOF); + printMachOLoadCommands(MachOOF); + } + if (FirstPrivateHeader) printMachOFileHeader(MachOOF); if (ObjcMetaData) printObjcMetaData(MachOOF, !NonVerbose); @@ -1477,10 +1481,8 @@ void llvm::ParseInputMachO(StringRef Filename) { // Attempt to open the binary. ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(Filename); - if (std::error_code EC = BinaryOrErr.getError()) { - errs() << "llvm-objdump: '" << Filename << "': " << EC.message() << ".\n"; - return; - } + if (std::error_code EC = BinaryOrErr.getError()) + report_error(Filename, EC); Binary &Bin = *BinaryOrErr.get().getBinary(); if (Archive *A = dyn_cast<Archive>(&Bin)) { @@ -1649,8 +1651,9 @@ void llvm::ParseInputMachO(StringRef Filename) { } else errs() << "llvm-objdump: '" << Filename << "': " << "Object is not a Mach-O file type.\n"; - } else - report_error(Filename, object_error::invalid_file_type); + return; + } + llvm_unreachable("Input object can't be invalid at this point"); } typedef std::pair<uint64_t, const char *> BindInfoEntry; @@ -8646,31 +8649,40 @@ static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t filetype, } } -static void getAndPrintMachHeader(const MachOObjectFile *Obj, - uint32_t &filetype, uint32_t &cputype, - bool verbose) { +static void PrintMachHeader(const MachOObjectFile *Obj, bool verbose) { if (Obj->is64Bit()) { MachO::mach_header_64 H_64; H_64 = Obj->getHeader64(); PrintMachHeader(H_64.magic, H_64.cputype, H_64.cpusubtype, H_64.filetype, H_64.ncmds, H_64.sizeofcmds, H_64.flags, verbose); - filetype = H_64.filetype; - cputype = H_64.cputype; } else { MachO::mach_header H; H = Obj->getHeader(); PrintMachHeader(H.magic, H.cputype, H.cpusubtype, H.filetype, H.ncmds, H.sizeofcmds, H.flags, verbose); - filetype = H.filetype; - cputype = H.cputype; } } void llvm::printMachOFileHeader(const object::ObjectFile *Obj) { const MachOObjectFile *file = dyn_cast<const MachOObjectFile>(Obj); + PrintMachHeader(file, !NonVerbose); +} + +void llvm::printMachOLoadCommands(const object::ObjectFile *Obj) { + const MachOObjectFile *file = dyn_cast<const MachOObjectFile>(Obj); uint32_t filetype = 0; uint32_t cputype = 0; - getAndPrintMachHeader(file, filetype, cputype, !NonVerbose); + if (file->is64Bit()) { + MachO::mach_header_64 H_64; + H_64 = file->getHeader64(); + filetype = H_64.filetype; + cputype = H_64.cputype; + } else { + MachO::mach_header H; + H = file->getHeader(); + filetype = H.filetype; + cputype = H.cputype; + } PrintLoadCommands(file, filetype, cputype, !NonVerbose); } diff --git a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp index 22167c7..d5ae5de 100644 --- a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -165,6 +165,11 @@ cl::opt<bool> llvm::PrivateHeaders("private-headers", cl::desc("Display format specific file headers")); +cl::opt<bool> +llvm::FirstPrivateHeader("private-header", + cl::desc("Display only the first format specific file " + "header")); + static cl::alias PrivateHeadersShort("p", cl::desc("Alias for --private-headers"), cl::aliasopt(PrivateHeaders)); @@ -482,6 +487,23 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj, case ELF::EM_MIPS: res = Target; break; + case ELF::EM_WEBASSEMBLY: + switch (type) { + case ELF::R_WEBASSEMBLY_DATA: { + std::string fmtbuf; + raw_string_ostream fmt(fmtbuf); + fmt << Target << (addend < 0 ? "" : "+") << addend; + fmt.flush(); + Result.append(fmtbuf.begin(), fmtbuf.end()); + break; + } + case ELF::R_WEBASSEMBLY_FUNCTION: + res = Target; + break; + default: + res = "Unknown"; + } + break; default: res = "Unknown"; } @@ -1478,7 +1500,19 @@ static void printFaultMaps(const ObjectFile *Obj) { outs() << FMP; } -static void printPrivateFileHeader(const ObjectFile *o) { +static void printPrivateFileHeaders(const ObjectFile *o) { + if (o->isELF()) + printELFFileHeader(o); + else if (o->isCOFF()) + printCOFFFileHeader(o); + else if (o->isMachO()) { + printMachOFileHeader(o); + printMachOLoadCommands(o); + } else + report_fatal_error("Invalid/Unsupported object file format"); +} + +static void printFirstPrivateFileHeader(const ObjectFile *o) { if (o->isELF()) printELFFileHeader(o); else if (o->isCOFF()) @@ -1510,7 +1544,9 @@ static void DumpObject(const ObjectFile *o) { if (UnwindInfo) PrintUnwindInfo(o); if (PrivateHeaders) - printPrivateFileHeader(o); + printPrivateFileHeaders(o); + if (FirstPrivateHeader) + printFirstPrivateFileHeader(o); if (ExportsTrie) printExportsTrie(o); if (Rebase) @@ -1601,6 +1637,7 @@ int main(int argc, char **argv) { && !SymbolTable && !UnwindInfo && !PrivateHeaders + && !FirstPrivateHeader && !ExportsTrie && !Rebase && !Bind diff --git a/contrib/llvm/tools/llvm-objdump/llvm-objdump.h b/contrib/llvm/tools/llvm-objdump/llvm-objdump.h index 6e8ad6b..60cabbc 100644 --- a/contrib/llvm/tools/llvm-objdump/llvm-objdump.h +++ b/contrib/llvm/tools/llvm-objdump/llvm-objdump.h @@ -31,6 +31,7 @@ extern cl::opt<bool> Disassemble; extern cl::opt<bool> DisassembleAll; extern cl::opt<bool> NoShowRawInsn; extern cl::opt<bool> PrivateHeaders; +extern cl::opt<bool> FirstPrivateHeader; extern cl::opt<bool> ExportsTrie; extern cl::opt<bool> Rebase; extern cl::opt<bool> Bind; @@ -70,6 +71,7 @@ void printELFFileHeader(const object::ObjectFile *o); void printCOFFFileHeader(const object::ObjectFile *o); void printCOFFSymbolTable(const object::COFFObjectFile *o); void printMachOFileHeader(const object::ObjectFile *o); +void printMachOLoadCommands(const object::ObjectFile *o); void printExportsTrie(const object::ObjectFile *o); void printRebaseTable(const object::ObjectFile *o); void printBindTable(const object::ObjectFile *o); diff --git a/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp b/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp index 516d1cf..d44da0d 100644 --- a/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp +++ b/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp @@ -603,12 +603,14 @@ void COFFDumper::printCodeViewSection(const SectionRef &Section) { // in the line table. The filename string is accessed using double // indirection to the string table subsection using the index subsection. uint32_t OffsetInIndex = DE.getU32(&Offset), - SegmentLength = DE.getU32(&Offset), + NumLines = DE.getU32(&Offset), FullSegmentSize = DE.getU32(&Offset); + uint32_t ColumnOffset = Offset + 8 * NumLines; + DataExtractor ColumnDE(DE.getData(), true, 4); + if (FullSegmentSize != - 12 + 8 * SegmentLength + - (HasColumnInformation ? 4 * SegmentLength : 0)) { + 12 + 8 * NumLines + (HasColumnInformation ? 4 * NumLines : 0)) { error(object_error::parse_failed); return; } @@ -635,29 +637,41 @@ void COFFDumper::printCodeViewSection(const SectionRef &Section) { StringRef Filename(CVStringTable.data() + FilenameOffset); ListScope S(W, "FilenameSegment"); W.printString("Filename", Filename); - for (unsigned J = 0; J != SegmentLength && DE.isValidOffset(Offset); - ++J) { + for (unsigned LineIdx = 0; + LineIdx != NumLines && DE.isValidOffset(Offset); ++LineIdx) { // Then go the (PC, LineNumber) pairs. The line number is stored in the // least significant 31 bits of the respective word in the table. - uint32_t PC = DE.getU32(&Offset), - LineNumber = DE.getU32(&Offset) & 0x7fffffff; + uint32_t PC = DE.getU32(&Offset), LineData = DE.getU32(&Offset); if (PC >= FunctionSize) { error(object_error::parse_failed); return; } char Buffer[32]; format("+0x%X", PC).snprint(Buffer, 32); - W.printNumber(Buffer, LineNumber); - } - if (HasColumnInformation) { - for (unsigned J = 0; J != SegmentLength && DE.isValidOffset(Offset); - ++J) { - uint16_t ColStart = DE.getU16(&Offset); + ListScope PCScope(W, Buffer); + uint32_t LineNumberStart = LineData & COFF::CVL_MaxLineNumber; + uint32_t LineNumberEndDelta = + (LineData >> COFF::CVL_LineNumberStartBits) & + COFF::CVL_LineNumberEndDeltaMask; + bool IsStatement = LineData & COFF::CVL_IsStatement; + W.printNumber("LineNumberStart", LineNumberStart); + W.printNumber("LineNumberEndDelta", LineNumberEndDelta); + W.printBoolean("IsStatement", IsStatement); + if (HasColumnInformation && + ColumnDE.isValidOffsetForDataOfSize(ColumnOffset, 4)) { + uint16_t ColStart = ColumnDE.getU16(&ColumnOffset); W.printNumber("ColStart", ColStart); - uint16_t ColEnd = DE.getU16(&Offset); + uint16_t ColEnd = ColumnDE.getU16(&ColumnOffset); W.printNumber("ColEnd", ColEnd); } } + // Skip over the column data. + if (HasColumnInformation) { + for (unsigned LineIdx = 0; + LineIdx != NumLines && DE.isValidOffset(Offset); ++LineIdx) { + DE.getU32(&Offset); + } + } } } } diff --git a/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp b/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp index 02397f3..be84f3c 100644 --- a/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -708,7 +708,8 @@ static const EnumEntry<unsigned> ElfMachineType[] = { LLVM_READOBJ_ENUM_ENT(ELF, EM_VIDEOCORE5 ), LLVM_READOBJ_ENUM_ENT(ELF, EM_78KOR ), LLVM_READOBJ_ENUM_ENT(ELF, EM_56800EX ), - LLVM_READOBJ_ENUM_ENT(ELF, EM_AMDGPU ) + LLVM_READOBJ_ENUM_ENT(ELF, EM_AMDGPU ), + LLVM_READOBJ_ENUM_ENT(ELF, EM_WEBASSEMBLY ), }; static const EnumEntry<unsigned> ElfSymbolBindings[] = { diff --git a/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp index e45660c..9503493 100644 --- a/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -82,6 +82,10 @@ static cl::opt<bool> ClPrettyPrint("pretty-print", cl::init(false), cl::desc("Make the output more human friendly")); +static cl::opt<int> ClPrintSourceContextLines( + "print-source-context-lines", cl::init(0), + cl::desc("Print N number of source file context")); + static bool error(std::error_code ec) { if (!ec) return false; @@ -89,18 +93,14 @@ static bool error(std::error_code ec) { return true; } -static bool parseCommand(bool &IsData, std::string &ModuleName, - uint64_t &ModuleOffset) { +static bool parseCommand(StringRef InputString, bool &IsData, + std::string &ModuleName, uint64_t &ModuleOffset) { const char *kDataCmd = "DATA "; const char *kCodeCmd = "CODE "; - const int kMaxInputStringLength = 1024; - const char kDelimiters[] = " \n"; - char InputString[kMaxInputStringLength]; - if (!fgets(InputString, sizeof(InputString), stdin)) - return false; + const char kDelimiters[] = " \n\r"; IsData = false; ModuleName = ""; - char *pos = InputString; + const char *pos = InputString.data(); if (strncmp(pos, kDataCmd, strlen(kDataCmd)) == 0) { IsData = true; pos += strlen(kDataCmd); @@ -117,7 +117,7 @@ static bool parseCommand(bool &IsData, std::string &ModuleName, if (*pos == '"' || *pos == '\'') { char quote = *pos; pos++; - char *end = strchr(pos, quote); + const char *end = strchr(pos, quote); if (!end) return false; ModuleName = std::string(pos, end - pos); @@ -158,13 +158,25 @@ int main(int argc, char **argv) { } LLVMSymbolizer Symbolizer(Opts); - bool IsData = false; - std::string ModuleName; - uint64_t ModuleOffset; DIPrinter Printer(outs(), ClPrintFunctions != FunctionNameKind::None, - ClPrettyPrint); + ClPrettyPrint, ClPrintSourceContextLines); + + const int kMaxInputStringLength = 1024; + char InputString[kMaxInputStringLength]; + + while (true) { + if (!fgets(InputString, sizeof(InputString), stdin)) + break; + + bool IsData = false; + std::string ModuleName; + uint64_t ModuleOffset = 0; + if (!parseCommand(StringRef(InputString), IsData, ModuleName, + ModuleOffset)) { + outs() << InputString; + continue; + } - while (parseCommand(IsData, ModuleName, ModuleOffset)) { if (ClPrintAddress) { outs() << "0x"; outs().write_hex(ModuleOffset); diff --git a/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp b/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp index cc74f9e..cf7cbd9 100644 --- a/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp +++ b/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp @@ -65,7 +65,7 @@ private: static void PrintCases(std::vector<std::pair<std::string, AsmWriterOperand> > &OpsToPrint, raw_ostream &O) { - O << " case " << OpsToPrint.back().first << ": "; + O << " case " << OpsToPrint.back().first << ":"; AsmWriterOperand TheOp = OpsToPrint.back().second; OpsToPrint.pop_back(); @@ -73,13 +73,13 @@ static void PrintCases(std::vector<std::pair<std::string, // emit a case label for them. for (unsigned i = OpsToPrint.size(); i != 0; --i) if (OpsToPrint[i-1].second == TheOp) { - O << "\n case " << OpsToPrint[i-1].first << ": "; + O << "\n case " << OpsToPrint[i-1].first << ":"; OpsToPrint.erase(OpsToPrint.begin()+i-1); } // Finally, emit the code. - O << TheOp.getCode(); - O << "break;\n"; + O << "\n " << TheOp.getCode(); + O << "\n break;\n"; } @@ -109,9 +109,9 @@ static void EmitInstructions(std::vector<AsmWriterInst> &Insts, O << " case " << FirstInst.CGI->Namespace << "::" << FirstInst.CGI->TheDef->getName() << ":\n"; - for (unsigned i = 0, e = SimilarInsts.size(); i != e; ++i) - O << " case " << SimilarInsts[i].CGI->Namespace << "::" - << SimilarInsts[i].CGI->TheDef->getName() << ":\n"; + for (const AsmWriterInst &AWI : SimilarInsts) + O << " case " << AWI.CGI->Namespace << "::" + << AWI.CGI->TheDef->getName() << ":\n"; for (unsigned i = 0, e = FirstInst.Operands.size(); i != e; ++i) { if (i != DifferingOperand) { // If the operand is the same for all instructions, just print it. @@ -120,13 +120,13 @@ static void EmitInstructions(std::vector<AsmWriterInst> &Insts, // If this is the operand that varies between all of the instructions, // emit a switch for just this operand now. O << " switch (MI->getOpcode()) {\n"; + O << " default: llvm_unreachable(\"Unexpected opcode.\");\n"; std::vector<std::pair<std::string, AsmWriterOperand> > OpsToPrint; OpsToPrint.push_back(std::make_pair(FirstInst.CGI->Namespace + "::" + FirstInst.CGI->TheDef->getName(), FirstInst.Operands[i])); - for (unsigned si = 0, e = SimilarInsts.size(); si != e; ++si) { - AsmWriterInst &AWI = SimilarInsts[si]; + for (const AsmWriterInst &AWI : SimilarInsts) { OpsToPrint.push_back(std::make_pair(AWI.CGI->Namespace+"::"+ AWI.CGI->TheDef->getName(), AWI.Operands[i])); @@ -159,11 +159,10 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands, if (!Inst) continue; // PHI, INLINEASM, CFI_INSTRUCTION, etc. - std::string Command; if (Inst->Operands.empty()) continue; // Instruction already done. - Command = " " + Inst->Operands[0].getCode() + "\n"; + std::string Command = " " + Inst->Operands[0].getCode() + "\n"; // Check to see if we already have 'Command' in UniqueOperandCommands. // If not, add it. @@ -178,7 +177,7 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands, } if (!FoundIt) { InstIdxs[i] = UniqueOperandCommands.size(); - UniqueOperandCommands.push_back(Command); + UniqueOperandCommands.push_back(std::move(Command)); InstrsForCase.push_back(Inst->CGI->TheDef->getName()); // This command matches one operand so far. @@ -293,14 +292,13 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) { /// OpcodeInfo - This encodes the index of the string to use for the first /// chunk of the output as well as indices used for operand printing. - /// To reduce the number of unhandled cases, we expand the size from 32-bit - /// to 32+16 = 48-bit. std::vector<uint64_t> OpcodeInfo; + const unsigned OpcodeInfoBits = 64; // Add all strings to the string table upfront so it can generate an optimized // representation. - for (unsigned i = 0, e = NumberedInstructions->size(); i != e; ++i) { - AsmWriterInst *AWI = CGIAWIMap[NumberedInstructions->at(i)]; + for (const CodeGenInstruction *Inst : *NumberedInstructions) { + AsmWriterInst *AWI = CGIAWIMap[Inst]; if (AWI && AWI->Operands[0].OperandType == AsmWriterOperand::isLiteralTextOperand && @@ -314,8 +312,8 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) { StringTable.layout(); unsigned MaxStringIdx = 0; - for (unsigned i = 0, e = NumberedInstructions->size(); i != e; ++i) { - AsmWriterInst *AWI = CGIAWIMap[NumberedInstructions->at(i)]; + for (const CodeGenInstruction *Inst : *NumberedInstructions) { + AsmWriterInst *AWI = CGIAWIMap[Inst]; unsigned Idx; if (!AWI) { // Something not handled by the asmwriter printer. @@ -344,7 +342,7 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) { // To reduce code size, we compactify common instructions into a few bits // in the opcode-indexed table. - unsigned BitsLeft = 64-AsmStrBits; + unsigned BitsLeft = OpcodeInfoBits-AsmStrBits; std::vector<std::vector<std::string>> TableDrivenOperandPrinters; @@ -372,7 +370,7 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) { // Otherwise, we can include this in the initial lookup table. Add it in. for (unsigned i = 0, e = InstIdxs.size(); i != e; ++i) if (InstIdxs[i] != ~0U) { - OpcodeInfo[i] |= (uint64_t)InstIdxs[i] << (64-BitsLeft); + OpcodeInfo[i] |= (uint64_t)InstIdxs[i] << (OpcodeInfoBits-BitsLeft); } BitsLeft -= NumBits; @@ -392,56 +390,55 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) { TableDrivenOperandPrinters.push_back(std::move(UniqueOperandCommands)); } - - // We always emit at least one 32-bit table. A second table is emitted if - // more bits are needed. - O<<" static const uint32_t OpInfo[] = {\n"; - for (unsigned i = 0, e = NumberedInstructions->size(); i != e; ++i) { - O << " " << (OpcodeInfo[i] & 0xffffffff) << "U,\t// " - << NumberedInstructions->at(i)->TheDef->getName() << "\n"; - } - // Add a dummy entry so the array init doesn't end with a comma. - O << " 0U\n"; + // Emit the string table itself. + O << " static const char AsmStrs[] = {\n"; + StringTable.emit(O, printChar); O << " };\n\n"; - if (BitsLeft < 32) { - // Add a second OpInfo table only when it is necessary. - // Adjust the type of the second table based on the number of bits needed. - O << " static const uint" - << ((BitsLeft < 16) ? "32" : (BitsLeft < 24) ? "16" : "8") - << "_t OpInfo2[] = {\n"; + // Emit the lookup tables in pieces to minimize wasted bytes. + unsigned BytesNeeded = ((OpcodeInfoBits - BitsLeft) + 7) / 8; + unsigned Table = 0, Shift = 0; + SmallString<128> BitsString; + raw_svector_ostream BitsOS(BitsString); + // If the total bits is more than 32-bits we need to use a 64-bit type. + BitsOS << " uint" << ((BitsLeft < (OpcodeInfoBits - 32)) ? 64 : 32) + << "_t Bits = 0;\n"; + while (BytesNeeded != 0) { + // Figure out how big this table section needs to be, but no bigger than 4. + unsigned TableSize = std::min(1 << Log2_32(BytesNeeded), 4); + BytesNeeded -= TableSize; + TableSize *= 8; // Convert to bits; + uint64_t Mask = (1ULL << TableSize) - 1; + O << " static const uint" << TableSize << "_t OpInfo" << Table + << "[] = {\n"; for (unsigned i = 0, e = NumberedInstructions->size(); i != e; ++i) { - O << " " << (OpcodeInfo[i] >> 32) << "U,\t// " + O << " " << ((OpcodeInfo[i] >> Shift) & Mask) << "U,\t// " << NumberedInstructions->at(i)->TheDef->getName() << "\n"; } - // Add a dummy entry so the array init doesn't end with a comma. - O << " 0U\n"; O << " };\n\n"; + // Emit string to combine the individual table lookups. + BitsOS << " Bits |= "; + // If the total bits is more than 32-bits we need to use a 64-bit type. + if (BitsLeft < (OpcodeInfoBits - 32)) + BitsOS << "(uint64_t)"; + BitsOS << "OpInfo" << Table << "[MI->getOpcode()] << " << Shift << ";\n"; + // Prepare the shift for the next iteration and increment the table count. + Shift += TableSize; + ++Table; } - // Emit the string itself. - O << " static const char AsmStrs[] = {\n"; - StringTable.emit(O, printChar); - O << " };\n\n"; - + // Emit the initial tab character. O << " O << \"\\t\";\n\n"; O << " // Emit the opcode for the instruction.\n"; - if (BitsLeft < 32) { - // If we have two tables then we need to perform two lookups and combine - // the results into a single 64-bit value. - O << " uint64_t Bits1 = OpInfo[MI->getOpcode()];\n" - << " uint64_t Bits2 = OpInfo2[MI->getOpcode()];\n" - << " uint64_t Bits = (Bits2 << 32) | Bits1;\n"; - } else { - // If only one table is used we just need to perform a single lookup. - O << " uint32_t Bits = OpInfo[MI->getOpcode()];\n"; - } + O << BitsString; + + // Emit the starting string. O << " assert(Bits != 0 && \"Cannot print this instruction.\");\n" << " O << AsmStrs+(Bits & " << (1 << AsmStrBits)-1 << ")-1;\n\n"; // Output the table driven operand information. - BitsLeft = 64-AsmStrBits; + BitsLeft = OpcodeInfoBits-AsmStrBits; for (unsigned i = 0, e = TableDrivenOperandPrinters.size(); i != e; ++i) { std::vector<std::string> &Commands = TableDrivenOperandPrinters[i]; @@ -457,7 +454,7 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) { if (Commands.size() == 2) { // Emit two possibilitys with if/else. O << " if ((Bits >> " - << (64-BitsLeft) << ") & " + << (OpcodeInfoBits-BitsLeft) << ") & " << ((1 << NumBits)-1) << ") {\n" << Commands[1] << " } else {\n" @@ -468,14 +465,14 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) { O << Commands[0] << "\n\n"; } else { O << " switch ((Bits >> " - << (64-BitsLeft) << ") & " + << (OpcodeInfoBits-BitsLeft) << ") & " << ((1 << NumBits)-1) << ") {\n" << " default: llvm_unreachable(\"Invalid command number.\");\n"; // Print out all the cases. - for (unsigned i = 0, e = Commands.size(); i != e; ++i) { - O << " case " << i << ":\n"; - O << Commands[i]; + for (unsigned j = 0, e = Commands.size(); j != e; ++j) { + O << " case " << j << ":\n"; + O << Commands[j]; O << " break;\n"; } O << " }\n\n"; @@ -484,14 +481,11 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) { } // Okay, delete instructions with no operand info left. - for (unsigned i = 0, e = Instructions.size(); i != e; ++i) { - // Entire instruction has been emitted? - AsmWriterInst &Inst = Instructions[i]; - if (Inst.Operands.empty()) { - Instructions.erase(Instructions.begin()+i); - --i; --e; - } - } + auto I = std::remove_if(Instructions.begin(), Instructions.end(), + [](AsmWriterInst &Inst) { + return Inst.Operands.empty(); + }); + Instructions.erase(I, Instructions.end()); // Because this is a vector, we want to emit from the end. Reverse all of the @@ -499,18 +493,18 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) { std::reverse(Instructions.begin(), Instructions.end()); - // Now that we've emitted all of the operand info that fit into 32 bits, emit + // Now that we've emitted all of the operand info that fit into 64 bits, emit // information for those instructions that are left. This is a less dense - // encoding, but we expect the main 32-bit table to handle the majority of + // encoding, but we expect the main 64-bit table to handle the majority of // instructions. if (!Instructions.empty()) { // Find the opcode # of inline asm. O << " switch (MI->getOpcode()) {\n"; + O << " default: llvm_unreachable(\"Unexpected opcode.\");\n"; while (!Instructions.empty()) EmitInstructions(Instructions, O); O << " }\n"; - O << " return;\n"; } O << "}\n"; @@ -603,16 +597,16 @@ void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) { << "\n"; if (hasAltNames) { - for (unsigned i = 0, e = AltNameIndices.size(); i < e; ++i) - emitRegisterNameString(O, AltNameIndices[i]->getName(), Registers); + for (const Record *R : AltNameIndices) + emitRegisterNameString(O, R->getName(), Registers); } else emitRegisterNameString(O, "", Registers); if (hasAltNames) { O << " switch(AltIdx) {\n" << " default: llvm_unreachable(\"Invalid register alt name index!\");\n"; - for (unsigned i = 0, e = AltNameIndices.size(); i < e; ++i) { - std::string AltName(AltNameIndices[i]->getName()); + for (const Record *R : AltNameIndices) { + std::string AltName(R->getName()); std::string Prefix = !Namespace.empty() ? Namespace + "::" : ""; O << " case " << Prefix << AltName << ":\n" << " assert(*(AsmStrs" << AltName << "+RegAsmOffset" @@ -799,9 +793,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) { typedef std::set<std::pair<CodeGenInstAlias, int>, AliasPriorityComparator> AliasWithPriority; std::map<std::string, AliasWithPriority> AliasMap; - for (std::vector<Record*>::iterator - I = AllInstAliases.begin(), E = AllInstAliases.end(); I != E; ++I) { - const Record *R = *I; + for (Record *R : AllInstAliases) { int Priority = R->getValueAsInt("EmitPriority"); if (Priority < 1) continue; // Aliases with priority 0 are never emitted. @@ -809,7 +801,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) { const DagInit *DI = R->getValueAsDag("ResultInst"); const DefInit *Op = cast<DefInit>(DI->getOperator()); AliasMap[getQualifiedName(Op->getDef())].insert( - std::make_pair(CodeGenInstAlias(*I, Variant, Target), Priority)); + std::make_pair(CodeGenInstAlias(R, Variant, Target), Priority)); } // A map of which conditions need to be met for each instruction operand @@ -977,9 +969,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) { CasesO.indent(2) << "case " << Entry.first << ":\n"; - for (std::vector<IAPrinter*>::iterator - II = UniqueIAPs.begin(), IE = UniqueIAPs.end(); II != IE; ++II) { - IAPrinter *IAP = *II; + for (IAPrinter *IAP : UniqueIAPs) { CasesO.indent(4); IAP->print(CasesO); CasesO << '\n'; @@ -1098,10 +1088,11 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) { AsmWriterEmitter::AsmWriterEmitter(RecordKeeper &R) : Records(R), Target(R) { Record *AsmWriter = Target.getAsmWriter(); + unsigned Variant = AsmWriter->getValueAsInt("Variant"); + unsigned PassSubtarget = AsmWriter->getValueAsInt("PassSubtarget"); for (const CodeGenInstruction *I : Target.instructions()) if (!I->AsmString.empty() && I->TheDef->getName() != "PHI") - Instructions.emplace_back(*I, AsmWriter->getValueAsInt("Variant"), - AsmWriter->getValueAsInt("PassSubtarget")); + Instructions.emplace_back(*I, Variant, PassSubtarget); // Get the instruction numbering. NumberedInstructions = &Target.getInstructionsByEnumValue(); @@ -1109,8 +1100,8 @@ AsmWriterEmitter::AsmWriterEmitter(RecordKeeper &R) : Records(R), Target(R) { // Compute the CodeGenInstruction -> AsmWriterInst mapping. Note that not // all machine instructions are necessarily being printed, so there may be // target instructions not in this map. - for (unsigned i = 0, e = Instructions.size(); i != e; ++i) - CGIAWIMap.insert(std::make_pair(Instructions[i].CGI, &Instructions[i])); + for (AsmWriterInst &AWI : Instructions) + CGIAWIMap.insert(std::make_pair(AWI.CGI, &AWI)); } void AsmWriterEmitter::run(raw_ostream &O) { diff --git a/contrib/llvm/utils/TableGen/AsmWriterInst.cpp b/contrib/llvm/utils/TableGen/AsmWriterInst.cpp index 9541887..5b09765 100644 --- a/contrib/llvm/utils/TableGen/AsmWriterInst.cpp +++ b/contrib/llvm/utils/TableGen/AsmWriterInst.cpp @@ -29,8 +29,8 @@ static bool isIdentChar(char C) { std::string AsmWriterOperand::getCode() const { if (OperandType == isLiteralTextOperand) { if (Str.size() == 1) - return "O << '" + Str + "'; "; - return "O << \"" + Str + "\"; "; + return "O << '" + Str + "';"; + return "O << \"" + Str + "\";"; } if (OperandType == isLiteralStatementOperand) @@ -44,7 +44,7 @@ std::string AsmWriterOperand::getCode() const { Result += ", O"; if (!MiModifier.empty()) Result += ", \"" + MiModifier + '"'; - return Result + "); "; + return Result + ");"; } /// ParseAsmString - Parse the specified Instruction's AsmString into this |