diff options
-rw-r--r--lib/CodeGen/AsmPrinter/WinException.cpp (renamed from lib/CodeGen/AsmPrinter/Win64Exception.cpp)331
-rw-r--r--lib/CodeGen/AsmPrinter/WinException.h (renamed from lib/CodeGen/AsmPrinter/Win64Exception.h)26
-rw-r--r--test/Assembler/dicompileunit.ll (renamed from test/Assembler/mdcompileunit.ll)0
-rw-r--r--test/Assembler/diexpression.ll (renamed from test/Assembler/mdexpression.ll)0
-rw-r--r--test/Assembler/difile-escaped-chars.ll (renamed from test/Assembler/mdfile-escaped-chars.ll)0
-rw-r--r--test/Assembler/diglobalvariable.ll (renamed from test/Assembler/mdglobalvariable.ll)0
-rw-r--r--test/Assembler/diimportedentity.ll (renamed from test/Assembler/mdimportedentity.ll)0
-rw-r--r--test/Assembler/dilexicalblock.ll (renamed from test/Assembler/mdlexicalblock.ll)0
-rw-r--r--test/Assembler/dilocalvariable.ll (renamed from test/Assembler/mdlocalvariable.ll)0
-rw-r--r--test/Assembler/dilocation.ll (renamed from test/Assembler/mdlocation.ll)0
-rw-r--r--test/Assembler/dinamespace.ll (renamed from test/Assembler/mdnamespace.ll)0
-rw-r--r--test/Assembler/diobjcproperty.ll (renamed from test/Assembler/mdobjcproperty.ll)0
-rw-r--r--test/Assembler/disubprogram.ll (renamed from test/Assembler/mdsubprogram.ll)0
-rw-r--r--test/Assembler/disubrange-empty-array.ll (renamed from test/Assembler/mdsubrange-empty-array.ll)0
-rw-r--r--test/Assembler/disubroutinetype.ll (renamed from test/Assembler/mdsubroutinetype.ll)0
-rw-r--r--test/Assembler/ditemplateparameter.ll (renamed from test/Assembler/mdtemplateparameter.ll)0
-rw-r--r--test/Assembler/ditype-large-values.ll (renamed from test/Assembler/mdtype-large-values.ll)0
-rw-r--r--test/Assembler/invalid-dicompileunit-language-bad.ll (renamed from test/Assembler/invalid-mdcompileunit-language-bad.ll)0
-rw-r--r--test/Assembler/invalid-dicompileunit-language-overflow.ll (renamed from test/Assembler/invalid-mdcompileunit-language-overflow.ll)0
-rw-r--r--test/Assembler/invalid-dicompileunit-missing-language.ll (renamed from test/Assembler/invalid-mdcompileunit-missing-language.ll)0
-rw-r--r--test/Assembler/invalid-dicompileunit-null-file.ll (renamed from test/Assembler/invalid-mdcompileunit-null-file.ll)0
-rw-r--r--test/Assembler/invalid-dicompositetype-missing-tag.ll (renamed from test/Assembler/invalid-mdcompositetype-missing-tag.ll)0
-rw-r--r--test/Assembler/invalid-diderivedtype-missing-basetype.ll (renamed from test/Assembler/invalid-mdderivedtype-missing-basetype.ll)0
-rw-r--r--test/Assembler/invalid-diderivedtype-missing-tag.ll (renamed from test/Assembler/invalid-mdderivedtype-missing-tag.ll)0
-rw-r--r--test/Assembler/invalid-dienumerator-missing-name.ll (renamed from test/Assembler/invalid-mdenumerator-missing-name.ll)0
-rw-r--r--test/Assembler/invalid-dienumerator-missing-value.ll (renamed from test/Assembler/invalid-mdenumerator-missing-value.ll)0
-rw-r--r--test/Assembler/invalid-diexpression-large.ll (renamed from test/Assembler/invalid-mdexpression-large.ll)0
-rw-r--r--test/Assembler/invalid-diexpression-verify.ll (renamed from test/Assembler/invalid-mdexpression-verify.ll)0
-rw-r--r--test/Assembler/invalid-difile-missing-directory.ll (renamed from test/Assembler/invalid-mdfile-missing-directory.ll)0
-rw-r--r--test/Assembler/invalid-difile-missing-filename.ll (renamed from test/Assembler/invalid-mdfile-missing-filename.ll)0
-rw-r--r--test/Assembler/invalid-diglobalvariable-empty-name.ll (renamed from test/Assembler/invalid-mdglobalvariable-empty-name.ll)0
-rw-r--r--test/Assembler/invalid-diglobalvariable-missing-name.ll (renamed from test/Assembler/invalid-mdglobalvariable-missing-name.ll)0
-rw-r--r--test/Assembler/invalid-diimportedentity-missing-scope.ll (renamed from test/Assembler/invalid-mdimportedentity-missing-scope.ll)0
-rw-r--r--test/Assembler/invalid-diimportedentity-missing-tag.ll (renamed from test/Assembler/invalid-mdimportedentity-missing-tag.ll)0
-rw-r--r--test/Assembler/invalid-dilexicalblock-missing-scope.ll (renamed from test/Assembler/invalid-mdlexicalblock-missing-scope.ll)0
-rw-r--r--test/Assembler/invalid-dilexicalblock-null-scope.ll (renamed from test/Assembler/invalid-mdlexicalblock-null-scope.ll)0
-rw-r--r--test/Assembler/invalid-dilexicalblockfile-missing-discriminator.ll (renamed from test/Assembler/invalid-mdlexicalblockfile-missing-discriminator.ll)0
-rw-r--r--test/Assembler/invalid-dilexicalblockfile-missing-scope.ll (renamed from test/Assembler/invalid-mdlexicalblockfile-missing-scope.ll)0
-rw-r--r--test/Assembler/invalid-dilexicalblockfile-null-scope.ll (renamed from test/Assembler/invalid-mdlexicalblockfile-null-scope.ll)0
-rw-r--r--test/Assembler/invalid-dilocalvariable-missing-scope.ll (renamed from test/Assembler/invalid-mdlocalvariable-missing-scope.ll)0
-rw-r--r--test/Assembler/invalid-dilocalvariable-missing-tag.ll (renamed from test/Assembler/invalid-mdlocalvariable-missing-tag.ll)0
-rw-r--r--test/Assembler/invalid-dilocalvariable-null-scope.ll (renamed from test/Assembler/invalid-mdlocalvariable-null-scope.ll)0
-rw-r--r--test/Assembler/invalid-dilocation-field-bad.ll (renamed from test/Assembler/invalid-mdlocation-field-bad.ll)0
-rw-r--r--test/Assembler/invalid-dilocation-field-twice.ll (renamed from test/Assembler/invalid-mdlocation-field-twice.ll)0
-rw-r--r--test/Assembler/invalid-dilocation-missing-scope-2.ll (renamed from test/Assembler/invalid-mdlocation-missing-scope-2.ll)0
-rw-r--r--test/Assembler/invalid-dilocation-missing-scope.ll (renamed from test/Assembler/invalid-mdlocation-missing-scope.ll)0
-rw-r--r--test/Assembler/invalid-dilocation-null-scope.ll (renamed from test/Assembler/invalid-mdlocation-null-scope.ll)0
-rw-r--r--test/Assembler/invalid-dilocation-overflow-column.ll (renamed from test/Assembler/invalid-mdlocation-overflow-column.ll)0
-rw-r--r--test/Assembler/invalid-dilocation-overflow-line.ll (renamed from test/Assembler/invalid-mdlocation-overflow-line.ll)0
-rw-r--r--test/Assembler/invalid-dinamespace-missing-namespace.ll (renamed from test/Assembler/invalid-mdnamespace-missing-namespace.ll)0
-rw-r--r--test/Assembler/invalid-disubrange-count-large.ll (renamed from test/Assembler/invalid-mdsubrange-count-large.ll)0
-rw-r--r--test/Assembler/invalid-disubrange-count-missing.ll (renamed from test/Assembler/invalid-mdsubrange-count-missing.ll)0
-rw-r--r--test/Assembler/invalid-disubrange-count-negative.ll (renamed from test/Assembler/invalid-mdsubrange-count-negative.ll)0
-rw-r--r--test/Assembler/invalid-disubrange-lowerBound-max.ll (renamed from test/Assembler/invalid-mdsubrange-lowerBound-max.ll)0
-rw-r--r--test/Assembler/invalid-disubrange-lowerBound-min.ll (renamed from test/Assembler/invalid-mdsubrange-lowerBound-min.ll)0
-rw-r--r--test/Assembler/invalid-disubroutinetype-missing-types.ll (renamed from test/Assembler/invalid-mdsubroutinetype-missing-types.ll)0
-rw-r--r--test/Assembler/invalid-ditemplatetypeparameter-missing-type.ll (renamed from test/Assembler/invalid-mdtemplatetypeparameter-missing-type.ll)0
-rw-r--r--test/Assembler/invalid-ditemplatevalueparameter-missing-value.ll (renamed from test/Assembler/invalid-mdtemplatevalueparameter-missing-value.ll)0
-rw-r--r--test/Bitcode/Inputs/invalid-alias-type-mismatch.bcbin0 -> 452 bytes
-rw-r--r--test/Bitcode/Inputs/invalid-metadata-not-followed-named-node.bcbin0 -> 878 bytes
-rw-r--r--test/Bitcode/Inputs/invalid-vector-length.bcbin0 -> 488 bytes
-rwxr-xr-xtest/DebugInfo/Inputs/dwarfdump.elf-mips64-64-bit-dwarfbin0 -> 15638 bytes
-rw-r--r--test/DebugInfo/Inputs/invalid.elfbin0 -> 64 bytes
-rw-r--r--test/DebugInfo/Inputs/test-multiple-macho.obin0 -> 2452 bytes
-rw-r--r--test/DebugInfo/Inputs/test-simple-macho.obin0 -> 1944 bytes
-rw-r--r--test/Object/Inputs/macho-invalid-headerbin0 -> 24 bytes
-rw-r--r--test/Object/Inputs/macho64-invalid-incomplete-segment-load-commandbin0 -> 64 bytes
-rw-r--r--test/Object/Inputs/no-start-symbol.elf-x86_64bin0 -> 544 bytes
1006 files changed, 41168 insertions, 12573 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 776f3f6..026fe47 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -32,15 +32,23 @@ project(LLVM)
"Define the maximum number of concurrent compilation jobs.")
- set(CMAKE_JOB_POOL_COMPILE compile_job_pool)
+ message(WARNING "Job pooling is only available with Ninja generators and CMake 3.0 and later.")
+ else()
+ set(CMAKE_JOB_POOL_COMPILE compile_job_pool)
+ endif()
"Define the maximum number of concurrent link jobs.")
- set(CMAKE_JOB_POOL_LINK link_job_pool)
+ message(WARNING "Job pooling is only available with Ninja generators and CMake 3.0 and later.")
+ else()
+ set(CMAKE_JOB_POOL_LINK link_job_pool)
+ endif()
# Add path for custom modules
@@ -169,6 +177,7 @@ set(LLVM_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/include)
diff --git a/autoconf/ b/autoconf/
index 586f3f2..390d22e 100644
--- a/autoconf/
+++ b/autoconf/
@@ -1337,6 +1337,13 @@ AC_ARG_WITH(default-sysroot,
[Default <path> to all compiler invocations for --sysroot=<path>.])
+ AS_HELP_STRING([--with-clang-default-openmp-runtime],
+ [The default OpenMP runtime for Clang.]),,
+ withval="libgomp")
+ [Default OpenMP runtime used by -fopenmp.])
dnl Allow linking of LLVM with GPLv3 binutils code.
diff --git a/cmake/modules/AddOCaml.cmake b/cmake/modules/AddOCaml.cmake
index c58ac9c..8b33332 100644
--- a/cmake/modules/AddOCaml.cmake
+++ b/cmake/modules/AddOCaml.cmake
@@ -149,7 +149,7 @@ function(add_ocaml_library name)
"-I" "${LLVM_LIBRARY_DIR}/ocaml/"
"-dump" "${bin}/${name}.odoc"
${ocaml_pkgs} ${ocaml_inputs}
- DEPENDS ${ocaml_inputs}
+ DEPENDS ${ocaml_inputs} ${ocaml_outputs}
COMMENT "Building OCaml documentation for ${name}"
diff --git a/cmake/modules/TableGen.cmake b/cmake/modules/TableGen.cmake
index 97e272b..85d720e 100644
--- a/cmake/modules/TableGen.cmake
+++ b/cmake/modules/TableGen.cmake
@@ -94,7 +94,11 @@ macro(add_tablegen target project)
if( ${${project}_TABLEGEN} STREQUAL "${target}" )
- set(${project}_TABLEGEN_EXE "${LLVM_NATIVE_BUILD}/bin/${target}")
+ set(${project}_TABLEGEN_EXE "${LLVM_NATIVE_BUILD}/bin/${target}")
+ else()
+ set(${project}_TABLEGEN_EXE "${LLVM_NATIVE_BUILD}/Release/bin/${target}")
+ endif()
set(${project}_TABLEGEN_EXE ${${project}_TABLEGEN_EXE} PARENT_SCOPE)
add_custom_command(OUTPUT ${${project}_TABLEGEN_EXE}
diff --git a/configure b/configure
index 37f3d7d..254f7e0 100755
--- a/configure
+++ b/configure
@@ -1495,6 +1495,8 @@ Optional Packages:
search for headers
--with-gcc-toolchain Directory where gcc is installed.
--with-default-sysroot Add --sysroot=<path> to all compiler invocations.
+ --with-clang-default-openmp-runtime
+ The default OpenMP runtime for Clang.
--with-binutils-include Specify path to binutils/include/ containing
plugin-api.h file for gold plugin.
--with-bug-report-url Specify the URL where bug reports should be
@@ -5948,6 +5950,20 @@ _ACEOF
+# Check whether --with-clang-default-openmp-runtime was given.
+if test "${with_clang_default_openmp_runtime+set}" = set; then
+ withval=$with_clang_default_openmp_runtime;
+ withval="libgomp"
+cat >>confdefs.h <<_ACEOF
# Check whether --with-binutils-include was given.
if test "${with_binutils_include+set}" = set; then
diff --git a/docs/CommandGuide/llvm-profdata.rst b/docs/CommandGuide/llvm-profdata.rst
index 45f7073..7053b7f 100644
--- a/docs/CommandGuide/llvm-profdata.rst
+++ b/docs/CommandGuide/llvm-profdata.rst
@@ -49,6 +49,28 @@ OPTIONS
Specify the output file name. *Output* cannot be ``-`` as the resulting
indexed profile data can't be written to standard output.
+.. option:: -instr (default)
+ Specify that the input profile is an instrumentation-based profile.
+.. option:: -sample
+ Specify that the input profile is a sample-based profile. When using
+ sample-based profiles, the format of the generated file can be generated
+ in one of three ways:
+ .. option:: -binary (default)
+ Emit the profile using a binary encoding.
+ .. option:: -text
+ Emit the profile in text mode.
+ .. option:: -gcc
+ Emit the profile using GCC's gcov format (Not yet supported).
.. program:: llvm-profdata show
.. _profdata-show:
@@ -95,6 +117,14 @@ OPTIONS
Specify the output file name. If *output* is ``-`` or it isn't specified,
then the output is sent to standard output.
+.. option:: -instr (default)
+ Specify that the input profile is an instrumentation-based profile.
+.. option:: -sample
+ Specify that the input profile is a sample-based profile.
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index 397d5fe..0996820 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -834,6 +834,11 @@ Named metadata is a collection of metadata. :ref:`Metadata
nodes <metadata>` (but not metadata strings) are the only valid
operands for a named metadata.
+#. Named metadata are represented as a string of characters with the
+ metadata prefix. The rules for metadata names are the same as for
+ identifiers, but quoted names are not allowed. ``"\xx"`` type escapes
+ are still valid, which allows any character to be part of a name.
; Some unnamed metadata nodes, which are referenced by the named metadata.
diff --git a/docs/Lexicon.rst b/docs/Lexicon.rst
index 112eb7d..912dee2 100644
--- a/docs/Lexicon.rst
+++ b/docs/Lexicon.rst
@@ -131,6 +131,10 @@ L
Loop-Closed Static Single Assignment Form
+ "Looks Good To Me". In a review thread, this indicates that the
+ reviewer thinks that the patch is okay to commit.
Loop Invariant Code Motion
diff --git a/docs/NVPTXUsage.rst b/docs/NVPTXUsage.rst
index e1c401d..fc697ca 100644
--- a/docs/NVPTXUsage.rst
+++ b/docs/NVPTXUsage.rst
@@ -168,10 +168,10 @@ These are overloaded intrinsics. You can use these on any pointer types.
.. code-block:: llvm
- declare i8* addrspace(1)*)
- declare i8* addrspace(3)*)
- declare i8* addrspace(4)*)
- declare i8* addrspace(5)*)
+ declare i8 addrspace(1)**)
+ declare i8 addrspace(3)**)
+ declare i8 addrspace(4)**)
+ declare i8 addrspace(5)**)
diff --git a/docs/TableGen/LangIntro.rst b/docs/TableGen/LangIntro.rst
index 85c74a5..4d4551e 100644
--- a/docs/TableGen/LangIntro.rst
+++ b/docs/TableGen/LangIntro.rst
@@ -190,7 +190,7 @@ supported include:
for 'a' in 'c.' This operation is analogous to $(subst) in GNU make.
``!foreach(a, b, c)``
- For each member 'b' of dag or list 'a' apply operator 'c.' 'b' is a dummy
+ For each member of dag or list 'b' apply operator 'c.' 'a' is a dummy
variable that should be declared as a member variable of an instantiated
class. This operation is analogous to $(foreach) in GNU make.
diff --git a/docs/YamlIO.rst b/docs/YamlIO.rst
index aa4bae3..f0baeb4 100644
--- a/docs/YamlIO.rst
+++ b/docs/YamlIO.rst
@@ -798,6 +798,8 @@ add "static const bool flow = true;". For instance:
static const bool flow = true;
+Flow mappings are subject to line wrapping according to the Output object
@@ -845,6 +847,8 @@ With the above, if you used MyList as the data type in your native data
structures, then when converted to YAML, a flow sequence of integers
will be used (e.g. [ 10, -3, 4 ]).
+Flow sequences are subject to line wrapping according to the Output object
Utility Macros
@@ -908,14 +912,14 @@ Output
The llvm::yaml::Output class is used to generate a YAML document from your
in-memory data structures, using traits defined on your data types.
-To instantiate an Output object you need an llvm::raw_ostream, and optionally
-a context pointer:
+To instantiate an Output object you need an llvm::raw_ostream, an optional
+context pointer and an optional wrapping column:
.. code-block:: c++
class Output : public IO {
- Output(llvm::raw_ostream &, void *context=NULL);
+ Output(llvm::raw_ostream &, void *context = NULL, int WrapColumn = 70);
Once you have an Output object, you can use the C++ stream operator on it
to write your native data as YAML. One thing to recall is that a YAML file
@@ -924,6 +928,10 @@ streaming as YAML is a mapping, scalar, or sequence, then Output assumes you
are generating one document and wraps the mapping output
with "``---``" and trailing "``...``".
+The WrapColumn parameter will cause the flow mappings and sequences to
+line-wrap when they go over the supplied column. Pass 0 to completely
+suppress the wrapping.
.. code-block:: c++
using llvm::yaml::Output;
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index effbd15..73bff0b 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -998,6 +998,13 @@ unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy);
void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest);
+ * Get the type of the element at a given index in the structure.
+ *
+ * @see llvm::StructType::getTypeAtIndex()
+ */
+LLVMTypeRef LLVMStructGetTypeAtIndex(LLVMTypeRef StructTy, unsigned i);
* Determine whether a structure is packed.
* @see llvm::StructType::isPacked()
diff --git a/include/llvm-c/Support.h b/include/llvm-c/Support.h
index a9216d0..eca3b7a 100644
--- a/include/llvm-c/Support.h
+++ b/include/llvm-c/Support.h
@@ -58,6 +58,24 @@ LLVMBool LLVMLoadLibraryPermanently(const char* Filename);
void LLVMParseCommandLineOptions(int argc, const char *const *argv,
const char *Overview);
+ * This function will search through all previously loaded dynamic
+ * libraries for the symbol \p symbolName. If it is found, the address of
+ * that symbol is returned. If not, null is returned.
+ *
+ * @see sys::DynamicLibrary::SearchForAddressOfSymbol()
+ */
+void *LLVMSearchForAddressOfSymbol(const char *symbolName);
+ * This functions permanently adds the symbol \p symbolName with the
+ * value \p symbolValue. These symbols are searched before any
+ * libraries.
+ *
+ * @see sys::DynamicLibrary::AddSymbol()
+ */
+void LLVMAddSymbol(const char *symbolName, void *symbolValue);
#ifdef __cplusplus
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index 36d8159..e5d143d 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -351,8 +351,7 @@ public:
/// This checks to see if the value of this APInt is the maximum signed
/// value for the APInt's bit width.
bool isMaxSignedValue() const {
- return BitWidth == 1 ? VAL == 0
- : !isNegative() && countPopulation() == BitWidth - 1;
+ return !isNegative() && countPopulation() == BitWidth - 1;
/// \brief Determine if this is the smallest unsigned value.
@@ -366,7 +365,7 @@ public:
/// This checks to see if the value of this APInt is the minimum signed
/// value for the APInt's bit width.
bool isMinSignedValue() const {
- return BitWidth == 1 ? VAL == 1 : isNegative() && isPowerOf2();
+ return isNegative() && isPowerOf2();
/// \brief Check if this APInt has an N-bits unsigned integer value.
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index 2416ce3..1362fe3 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -50,7 +50,8 @@ public:
armeb, // ARM (big endian): armeb
aarch64, // AArch64 (little endian): aarch64
aarch64_be, // AArch64 (big endian): aarch64_be
- bpf, // eBPF or extended BPF or 64-bit BPF (little endian)
+ bpfel, // eBPF or extended BPF or 64-bit BPF (little endian)
+ bpfeb, // eBPF or extended BPF or 64-bit BPF (big endian)
hexagon, // Hexagon: hexagon
mips, // MIPS: mips, mipsallegrex
mipsel, // MIPSEL: mipsel, mipsallegrexel
@@ -255,6 +256,15 @@ public:
/// getEnvironment - Get the parsed environment type of this triple.
EnvironmentType getEnvironment() const { return Environment; }
+ /// \brief Parse the version number from the OS name component of the
+ /// triple, if present.
+ ///
+ /// For example, "fooos1.2.3" would return (1, 2, 3).
+ ///
+ /// If an entry is not defined, it will be returned as 0.
+ void getEnvironmentVersion(unsigned &Major, unsigned &Minor,
+ unsigned &Micro) const;
/// getFormat - Get the object format for this triple.
ObjectFormatType getObjectFormat() const { return ObjectFormat; }
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index ac9d21c..de18e58 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -40,6 +40,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/Analysis/MemoryLocation.h"
namespace llvm {
@@ -82,7 +83,7 @@ public:
/// UnknownSize - This is a special value which can be used with the
/// size arguments in alias queries to indicate that the caller does not
/// know the sizes of the potential memory references.
- static uint64_t const UnknownSize = ~UINT64_C(0);
+ static uint64_t const UnknownSize = MemoryLocation::UnknownSize;
/// getTargetLibraryInfo - Return a pointer to the current TargetLibraryInfo
/// object, or null if no TargetLibraryInfo object is available.
@@ -98,70 +99,9 @@ public:
/// Alias Queries...
- /// Location - A description of a memory location.
- struct Location {
- /// Ptr - The address of the start of the location.
- const Value *Ptr;
- /// Size - The maximum size of the location, in address-units, or
- /// UnknownSize if the size is not known. Note that an unknown size does
- /// not mean the pointer aliases the entire virtual address space, because
- /// there are restrictions on stepping out of one object and into another.
- /// See
- uint64_t Size;
- /// AATags - The metadata nodes which describes the aliasing of the
- /// location (each member is null if that kind of information is
- /// unavailable)..
- AAMDNodes AATags;
- explicit Location(const Value *P = nullptr, uint64_t S = UnknownSize,
- const AAMDNodes &N = AAMDNodes())
- : Ptr(P), Size(S), AATags(N) {}
- Location getWithNewPtr(const Value *NewPtr) const {
- Location Copy(*this);
- Copy.Ptr = NewPtr;
- return Copy;
- }
- Location getWithNewSize(uint64_t NewSize) const {
- Location Copy(*this);
- Copy.Size = NewSize;
- return Copy;
- }
- Location getWithoutAATags() const {
- Location Copy(*this);
- Copy.AATags = AAMDNodes();
- return Copy;
- }
- bool operator==(const AliasAnalysis::Location &Other) const {
- return Ptr == Other.Ptr && Size == Other.Size && AATags == Other.AATags;
- }
- };
- /// getLocation - Fill in Loc with information about the memory reference by
- /// the given instruction.
- Location getLocation(const LoadInst *LI);
- Location getLocation(const StoreInst *SI);
- Location getLocation(const VAArgInst *VI);
- Location getLocation(const AtomicCmpXchgInst *CXI);
- Location getLocation(const AtomicRMWInst *RMWI);
- static Location getLocationForSource(const MemTransferInst *MTI);
- static Location getLocationForDest(const MemIntrinsic *MI);
- Location getLocation(const Instruction *Inst) {
- if (auto *I = dyn_cast<LoadInst>(Inst))
- return getLocation(I);
- else if (auto *I = dyn_cast<StoreInst>(Inst))
- return getLocation(I);
- else if (auto *I = dyn_cast<VAArgInst>(Inst))
- return getLocation(I);
- else if (auto *I = dyn_cast<AtomicCmpXchgInst>(Inst))
- return getLocation(I);
- else if (auto *I = dyn_cast<AtomicRMWInst>(Inst))
- return getLocation(I);
- llvm_unreachable("unsupported memory instruction");
- }
+ /// Legacy typedef for the AA location object. New code should use \c
+ /// MemoryLocation directly.
+ typedef MemoryLocation Location;
/// Alias analysis result - Either we know for sure that it does not alias, we
/// know for sure it must alias, or we don't know anything: The two pointers
@@ -601,28 +541,6 @@ public:
-// Specialize DenseMapInfo for Location.
-struct DenseMapInfo<AliasAnalysis::Location> {
- static inline AliasAnalysis::Location getEmptyKey() {
- return AliasAnalysis::Location(DenseMapInfo<const Value *>::getEmptyKey(),
- 0);
- }
- static inline AliasAnalysis::Location getTombstoneKey() {
- return AliasAnalysis::Location(
- DenseMapInfo<const Value *>::getTombstoneKey(), 0);
- }
- static unsigned getHashValue(const AliasAnalysis::Location &Val) {
- return DenseMapInfo<const Value *>::getHashValue(Val.Ptr) ^
- DenseMapInfo<uint64_t>::getHashValue(Val.Size) ^
- DenseMapInfo<AAMDNodes>::getHashValue(Val.AATags);
- }
- static bool isEqual(const AliasAnalysis::Location &LHS,
- const AliasAnalysis::Location &RHS) {
- return LHS == RHS;
- }
/// isNoAliasCall - Return true if this pointer is returned by a noalias
/// function.
bool isNoAliasCall(const Value *V);
diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 9acc863..85a299b 100644
--- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -191,8 +191,8 @@ public:
/// \brief Data about a loop.
- /// Contains the data necessary to represent represent a loop as a
- /// pseudo-node once it's packaged.
+ /// Contains the data necessary to represent a loop as a pseudo-node once it's
+ /// packaged.
struct LoopData {
typedef SmallVector<std::pair<BlockNode, BlockMass>, 4> ExitMap;
typedef SmallVector<BlockNode, 4> NodeList;
@@ -930,7 +930,7 @@ void BlockFrequencyInfoImpl<BT>::doFunction(const FunctionT *F,
- // Visit loops in post-order to find thelocal mass distribution, and then do
+ // Visit loops in post-order to find the local mass distribution, and then do
// the full function.
diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h
index 89eef68..9d86756 100644
--- a/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -47,6 +47,9 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnFunction(Function &F) override;
+ void releaseMemory() override;
void print(raw_ostream &OS, const Module *M = nullptr) const override;
/// \brief Get an edge's probability, relative to other out-edges of the Src.
diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h
index 14b8822..5b64d85 100644
--- a/include/llvm/Analysis/CallGraph.h
+++ b/include/llvm/Analysis/CallGraph.h
@@ -230,7 +230,7 @@ public:
void addCalledFunction(CallSite CS, CallGraphNode *M) {
assert(!CS.getInstruction() || !CS.getCalledFunction() ||
- CalledFunctions.push_back(std::make_pair(CS.getInstruction(), M));
+ CalledFunctions.emplace_back(CS.getInstruction(), M);
diff --git a/include/llvm/Analysis/DependenceAnalysis.h b/include/llvm/Analysis/DependenceAnalysis.h
index 0b3b2ea..a08ce57 100644
--- a/include/llvm/Analysis/DependenceAnalysis.h
+++ b/include/llvm/Analysis/DependenceAnalysis.h
@@ -41,6 +41,7 @@
#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Pass.h"
@@ -520,11 +521,11 @@ namespace llvm {
/// in LoopNest.
bool isLoopInvariant(const SCEV *Expression, const Loop *LoopNest) const;
- /// Makes sure both subscripts (i.e. Pair->Src and Pair->Dst) share the same
- /// integer type by sign-extending one of them when necessary.
+ /// Makes sure all subscript pairs share the same integer type by
+ /// sign-extending as necessary.
/// Sign-extending a subscript is safe because getelementptr assumes the
- /// array subscripts are signed.
- void unifySubscriptType(Subscript *Pair);
+ /// array subscripts are signed.
+ void unifySubscriptType(ArrayRef<Subscript *> Pairs);
/// removeMatchingExtensions - Examines a subscript pair.
/// If the source and destination are identically sign (or zero)
diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h
index c14e145..7b635a8 100644
--- a/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -345,6 +345,10 @@ public:
/// to needsChecking.
bool needsAnyChecking(const SmallVectorImpl<int> *PtrPartition) const;
+ /// \brief Returns the number of run-time checks required according to
+ /// needsChecking.
+ unsigned getNumberOfChecks(const SmallVectorImpl<int> *PtrPartition) const;
/// \brief Print the list run-time memory checks necessary.
/// If \p PtrPartition is set, it contains the partition number for
@@ -385,7 +389,10 @@ public:
/// \brief Number of memchecks required to prove independence of otherwise
/// may-alias pointers.
- unsigned getNumRuntimePointerChecks() const { return NumComparisons; }
+ unsigned getNumRuntimePointerChecks(
+ const SmallVectorImpl<int> *PtrPartition = nullptr) const {
+ return PtrRtCheck.getNumberOfChecks(PtrPartition);
+ }
/// Return true if the block BB needs to be predicated in order for the loop
/// to be vectorized.
@@ -460,10 +467,6 @@ private:
/// loop-independent and loop-carried dependences between memory accesses.
MemoryDepChecker DepChecker;
- /// \brief Number of memchecks required to prove independence of otherwise
- /// may-alias pointers
- unsigned NumComparisons;
Loop *TheLoop;
ScalarEvolution *SE;
const DataLayout &DL;
@@ -501,6 +504,11 @@ const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE,
const ValueToValueMap &PtrToStride,
Value *Ptr, Value *OrigPtr = nullptr);
+/// \brief Check the stride of the pointer and ensure that it does not wrap in
+/// the address space.
+int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
+ const ValueToValueMap &StridesMap);
/// \brief This analysis provides dependence information for the memory accesses
/// of a loop.
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index be78c15..bbcde8d 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -47,13 +47,6 @@ namespace llvm {
template <typename IRUnitT> class AnalysisManager;
class PreservedAnalyses;
-template<typename T>
-inline void RemoveFromVector(std::vector<T*> &V, T *N) {
- typename std::vector<T*>::iterator I = std::find(V.begin(), V.end(), N);
- assert(I != V.end() && "N is not in this list!");
- V.erase(I);
class DominatorTree;
class LoopInfo;
class Loop;
@@ -324,7 +317,10 @@ public:
/// current loop, updating the Blocks as appropriate. This does not update
/// the mapping in the LoopInfo class.
void removeBlockFromLoop(BlockT *BB) {
- RemoveFromVector(Blocks, BB);
+ auto I = std::find(Blocks.begin(), Blocks.end(), BB);
+ assert(I != Blocks.end() && "N is not in this list!");
+ Blocks.erase(I);
@@ -493,7 +489,7 @@ private:
template<class BlockT, class LoopT>
class LoopInfoBase {
// BBMap - Mapping of basic blocks to the inner most loop they occur in
- DenseMap<BlockT *, LoopT *> BBMap;
+ DenseMap<const BlockT *, LoopT *> BBMap;
std::vector<LoopT *> TopLevelLoops;
friend class LoopBase<BlockT, LoopT>;
friend class LoopInfo;
@@ -543,9 +539,7 @@ public:
/// getLoopFor - Return the inner most loop that BB lives in. If a basic
/// block is in no loop (for example the entry node), null is returned.
- LoopT *getLoopFor(const BlockT *BB) const {
- return BBMap.lookup(const_cast<BlockT*>(BB));
- }
+ LoopT *getLoopFor(const BlockT *BB) const { return BBMap.lookup(BB); }
/// operator[] - same as getLoopFor...
@@ -562,7 +556,7 @@ public:
// isLoopHeader - True if the block is a loop header node
- bool isLoopHeader(BlockT *BB) const {
+ bool isLoopHeader(const BlockT *BB) const {
const LoopT *L = getLoopFor(BB);
return L && L->getHeader() == BB;
@@ -729,12 +723,6 @@ public:
/// \brief Provide a name for the analysis for debugging and logging.
static StringRef name() { return "LoopAnalysis"; }
- LoopAnalysis() {}
- LoopAnalysis(const LoopAnalysis &Arg) {}
- LoopAnalysis(LoopAnalysis &&Arg) {}
- LoopAnalysis &operator=(const LoopAnalysis &RHS) { return *this; }
- LoopAnalysis &operator=(LoopAnalysis &&RHS) { return *this; }
LoopInfo run(Function &F, AnalysisManager<Function> *AM);
diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h
index 0490bb1..f5cc856 100644
--- a/include/llvm/Analysis/LoopInfoImpl.h
+++ b/include/llvm/Analysis/LoopInfoImpl.h
@@ -527,7 +527,7 @@ void LoopInfoBase<BlockT, LoopT>::verify() const {
// Verify that blocks are mapped to valid loops.
#ifndef NDEBUG
for (auto &Entry : BBMap) {
- BlockT *BB = Entry.first;
+ const BlockT *BB = Entry.first;
LoopT *L = Entry.second;
assert(Loops.count(L) && "orphaned loop");
assert(L->contains(BB) && "orphaned block");
diff --git a/include/llvm/Analysis/MemoryLocation.h b/include/llvm/Analysis/MemoryLocation.h
new file mode 100644
index 0000000..94d938d
--- /dev/null
+++ b/include/llvm/Analysis/MemoryLocation.h
@@ -0,0 +1,137 @@
+//===- MemoryLocation.h - Memory location descriptions ----------*- C++ -*-===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+/// \file
+/// This file provides utility analysis objects describing memory locations.
+/// These are used both by the Alias Analysis infrastructure and more
+/// specialized memory analysis layers.
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Metadata.h"
+namespace llvm {
+class LoadInst;
+class StoreInst;
+class MemTransferInst;
+class MemIntrinsic;
+/// Representation for a specific memory location.
+/// This abstraction can be used to represent a specific location in memory.
+/// The goal of the location is to represent enough information to describe
+/// abstract aliasing, modification, and reference behaviors of whatever
+/// value(s) are stored in memory at the particular location.
+/// The primary user of this interface is LLVM's Alias Analysis, but other
+/// memory analyses such as MemoryDependence can use it as well.
+class MemoryLocation {
+ /// UnknownSize - This is a special value which can be used with the
+ /// size arguments in alias queries to indicate that the caller does not
+ /// know the sizes of the potential memory references.
+ enum : uint64_t { UnknownSize = ~UINT64_C(0) };
+ /// The address of the start of the location.
+ const Value *Ptr;
+ /// The maximum size of the location, in address-units, or
+ /// UnknownSize if the size is not known.
+ ///
+ /// Note that an unknown size does not mean the pointer aliases the entire
+ /// virtual address space, because there are restrictions on stepping out of
+ /// one object and into another. See
+ ///
+ uint64_t Size;
+ /// The metadata nodes which describes the aliasing of the location (each
+ /// member is null if that kind of information is unavailable).
+ AAMDNodes AATags;
+ /// Return a location with information about the memory reference by the given
+ /// instruction.
+ static MemoryLocation get(const LoadInst *LI);
+ static MemoryLocation get(const StoreInst *SI);
+ static MemoryLocation get(const VAArgInst *VI);
+ static MemoryLocation get(const AtomicCmpXchgInst *CXI);
+ static MemoryLocation get(const AtomicRMWInst *RMWI);
+ static MemoryLocation get(const Instruction *Inst) {
+ if (auto *I = dyn_cast<LoadInst>(Inst))
+ return get(I);
+ else if (auto *I = dyn_cast<StoreInst>(Inst))
+ return get(I);
+ else if (auto *I = dyn_cast<VAArgInst>(Inst))
+ return get(I);
+ else if (auto *I = dyn_cast<AtomicCmpXchgInst>(Inst))
+ return get(I);
+ else if (auto *I = dyn_cast<AtomicRMWInst>(Inst))
+ return get(I);
+ llvm_unreachable("unsupported memory instruction");
+ }
+ /// Return a location representing the source of a memory transfer.
+ static MemoryLocation getForSource(const MemTransferInst *MTI);
+ /// Return a location representing the destination of a memory set or
+ /// transfer.
+ static MemoryLocation getForDest(const MemIntrinsic *MI);
+ explicit MemoryLocation(const Value *Ptr = nullptr,
+ uint64_t Size = UnknownSize,
+ const AAMDNodes &AATags = AAMDNodes())
+ : Ptr(Ptr), Size(Size), AATags(AATags) {}
+ MemoryLocation getWithNewPtr(const Value *NewPtr) const {
+ MemoryLocation Copy(*this);
+ Copy.Ptr = NewPtr;
+ return Copy;
+ }
+ MemoryLocation getWithNewSize(uint64_t NewSize) const {
+ MemoryLocation Copy(*this);
+ Copy.Size = NewSize;
+ return Copy;
+ }
+ MemoryLocation getWithoutAATags() const {
+ MemoryLocation Copy(*this);
+ Copy.AATags = AAMDNodes();
+ return Copy;
+ }
+ bool operator==(const MemoryLocation &Other) const {
+ return Ptr == Other.Ptr && Size == Other.Size && AATags == Other.AATags;
+ }
+// Specialize DenseMapInfo for MemoryLocation.
+template <> struct DenseMapInfo<MemoryLocation> {
+ static inline MemoryLocation getEmptyKey() {
+ return MemoryLocation(DenseMapInfo<const Value *>::getEmptyKey(), 0);
+ }
+ static inline MemoryLocation getTombstoneKey() {
+ return MemoryLocation(DenseMapInfo<const Value *>::getTombstoneKey(), 0);
+ }
+ static unsigned getHashValue(const MemoryLocation &Val) {
+ return DenseMapInfo<const Value *>::getHashValue(Val.Ptr) ^
+ DenseMapInfo<uint64_t>::getHashValue(Val.Size) ^
+ DenseMapInfo<AAMDNodes>::getHashValue(Val.AATags);
+ }
+ static bool isEqual(const MemoryLocation &LHS, const MemoryLocation &RHS) {
+ return LHS == RHS;
+ }
diff --git a/include/llvm/Analysis/PHITransAddr.h b/include/llvm/Analysis/PHITransAddr.h
index 84bb9d8..cbdbb88 100644
--- a/include/llvm/Analysis/PHITransAddr.h
+++ b/include/llvm/Analysis/PHITransAddr.h
@@ -75,12 +75,12 @@ public:
bool IsPotentiallyPHITranslatable() const;
/// PHITranslateValue - PHI translate the current address up the CFG from
- /// CurBB to Pred, updating our state to reflect any needed changes. If the
- /// dominator tree DT is non-null, the translated value must dominate
+ /// CurBB to Pred, updating our state to reflect any needed changes. If
+ /// 'MustDominate' is true, the translated value must dominate
/// PredBB. This returns true on failure and sets Addr to null.
bool PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB,
- const DominatorTree *DT);
+ const DominatorTree *DT, bool MustDominate);
/// PHITranslateWithInsertion - PHI translate this value into the specified
/// predecessor block, inserting a computation of the value if it is
/// unavailable.
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 86bf154..3700c9e 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -221,19 +221,21 @@ public:
/// Parameters that control the generic loop unrolling transformation.
struct UnrollingPreferences {
- /// The cost threshold for the unrolled loop, compared to
- /// CodeMetrics.NumInsts aggregated over all basic blocks in the loop body.
- /// The unrolling factor is set such that the unrolled loop body does not
- /// exceed this cost. Set this to UINT_MAX to disable the loop body cost
+ /// The cost threshold for the unrolled loop. Should be relative to the
+ /// getUserCost values returned by this API, and the expectation is that
+ /// the unrolled loop's instructions when run through that interface should
+ /// not exceed this cost. However, this is only an estimate. Also, specific
+ /// loops may be unrolled even with a cost above this threshold if deemed
+ /// profitable. Set this to UINT_MAX to disable the loop body cost
/// restriction.
unsigned Threshold;
- /// If complete unrolling could help other optimizations (e.g. InstSimplify)
- /// to remove N% of instructions, then we can go beyond unroll threshold.
- /// This value set the minimal percent for allowing that.
- unsigned MinPercentOfOptimized;
- /// The absolute cost threshold. We won't go beyond this even if complete
- /// unrolling could result in optimizing out 90% of instructions.
- unsigned AbsoluteThreshold;
+ /// If complete unrolling will reduce the cost of the loop below its
+ /// expected dynamic cost while rolled by this percentage, apply a discount
+ /// (below) to its unrolled cost.
+ unsigned PercentDynamicCostSavedThreshold;
+ /// The discount applied to the unrolled cost when the *dynamic* cost
+ /// savings of unrolling exceed the \c PercentDynamicCostSavedThreshold.
+ unsigned DynamicCostSavingsDiscount;
/// The cost threshold for the unrolled loop when optimizing for size (set
/// to UINT_MAX to disable).
unsigned OptSizeThreshold;
@@ -303,7 +305,8 @@ public:
/// mode is legal for a load/store of any legal type.
/// TODO: Handle pre/postinc as well.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) const;
+ bool HasBaseReg, int64_t Scale,
+ unsigned AddrSpace = 0) const;
/// \brief Return true if the target works with masked instruction
/// AVX2 allows masks for consecutive load and store for i32 and i64 elements.
@@ -319,7 +322,8 @@ public:
/// If the AM is not supported, it returns a negative value.
/// TODO: Handle pre/postinc as well.
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) const;
+ bool HasBaseReg, int64_t Scale,
+ unsigned AddrSpace = 0) const;
/// \brief Return true if it's free to truncate a value of type Ty1 to type
/// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
@@ -444,6 +448,20 @@ public:
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace) const;
+ /// \return The cost of the interleaved memory operation.
+ /// \p Opcode is the memory operation code
+ /// \p VecTy is the vector type of the interleaved access.
+ /// \p Factor is the interleave factor
+ /// \p Indices is the indices for interleaved load members (as interleaved
+ /// load allows gaps)
+ /// \p Alignment is the alignment of the memory operation
+ /// \p AddressSpace is address space of the pointer.
+ unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) const;
/// \brief Calculate the cost of performing a vector reduction.
/// This is the cost of reducing the vector value of type \p Ty to a scalar
@@ -539,12 +557,13 @@ public:
virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale) = 0;
+ int64_t Scale,
+ unsigned AddrSpace) = 0;
virtual bool isLegalMaskedStore(Type *DataType, int Consecutive) = 0;
virtual bool isLegalMaskedLoad(Type *DataType, int Consecutive) = 0;
virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale) = 0;
+ int64_t Scale, unsigned AddrSpace) = 0;
virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
virtual bool isProfitableToHoist(Instruction *I) = 0;
virtual bool isTypeLegal(Type *Ty) = 0;
@@ -582,6 +601,11 @@ public:
virtual unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) = 0;
+ virtual unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) = 0;
virtual unsigned getReductionCost(unsigned Opcode, Type *Ty,
bool IsPairwiseForm) = 0;
virtual unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
@@ -648,9 +672,10 @@ public:
return Impl.isLegalICmpImmediate(Imm);
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) override {
+ bool HasBaseReg, int64_t Scale,
+ unsigned AddrSpace) override {
return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
- Scale);
+ Scale, AddrSpace);
bool isLegalMaskedStore(Type *DataType, int Consecutive) override {
return Impl.isLegalMaskedStore(DataType, Consecutive);
@@ -659,8 +684,10 @@ public:
return Impl.isLegalMaskedLoad(DataType, Consecutive);
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) override {
- return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale);
+ bool HasBaseReg, int64_t Scale,
+ unsigned AddrSpace) override {
+ return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
+ Scale, AddrSpace);
bool isTruncateFree(Type *Ty1, Type *Ty2) override {
return Impl.isTruncateFree(Ty1, Ty2);
@@ -740,6 +767,14 @@ public:
unsigned AddressSpace) override {
return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+ unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) override {
+ return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace);
+ }
unsigned getReductionCost(unsigned Opcode, Type *Ty,
bool IsPairwiseForm) override {
return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm);
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h
index 253319c..e6a8a76 100644
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -207,7 +207,8 @@ public:
bool isLegalICmpImmediate(int64_t Imm) { return false; }
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) {
+ bool HasBaseReg, int64_t Scale,
+ unsigned AddrSpace) {
// Guess that only reg and reg+reg addressing is allowed. This heuristic is
// taken from the implementation of LSR.
return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
@@ -218,9 +219,10 @@ public:
bool isLegalMaskedLoad(Type *DataType, int Consecutive) { return false; }
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) {
+ bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
// Guess that all legal addressing mode are free.
- if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale))
+ if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
+ Scale, AddrSpace))
return 0;
return -1;
@@ -300,6 +302,14 @@ public:
return 1;
+ unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
+ return 1;
+ }
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys) {
return 1;
diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h
index 46e64d1..4c040a7 100644
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitcode/BitstreamReader.h
@@ -113,7 +113,7 @@ public:
return *const_cast<BlockInfo*>(BI);
// Otherwise, add a new record.
- BlockInfoRecords.push_back(BlockInfo());
+ BlockInfoRecords.emplace_back();
BlockInfoRecords.back().BlockID = BlockID;
return BlockInfoRecords.back();
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
index 9e2c2fa..f7487a0 100644
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -215,7 +215,7 @@ public:
// Push the outer block's abbrev set onto the stack, start out with an
// empty abbrev set.
- BlockScope.push_back(Block(OldCodeSize, BlockSizeWordIndex));
+ BlockScope.emplace_back(OldCodeSize, BlockSizeWordIndex);
// If there is a blockinfo for this BlockID, add all the predefined abbrevs
@@ -503,7 +503,7 @@ private:
return *BI;
// Otherwise, add a new record.
- BlockInfoRecords.push_back(BlockInfo());
+ BlockInfoRecords.emplace_back();
BlockInfoRecords.back().BlockID = BlockID;
return BlockInfoRecords.back();
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h
index d072655..3e464f4 100644
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -125,23 +125,24 @@ public:
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) {
+ bool HasBaseReg, int64_t Scale,
+ unsigned AddrSpace) {
TargetLoweringBase::AddrMode AM;
AM.BaseGV = BaseGV;
AM.BaseOffs = BaseOffset;
AM.HasBaseReg = HasBaseReg;
AM.Scale = Scale;
- return getTLI()->isLegalAddressingMode(AM, Ty);
+ return getTLI()->isLegalAddressingMode(AM, Ty, AddrSpace);
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) {
+ bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
TargetLoweringBase::AddrMode AM;
AM.BaseGV = BaseGV;
AM.BaseOffs = BaseOffset;
AM.HasBaseReg = HasBaseReg;
AM.Scale = Scale;
- return getTLI()->getScalingFactorCost(AM, Ty);
+ return getTLI()->getScalingFactorCost(AM, Ty, AddrSpace);
bool isTruncateFree(Type *Ty1, Type *Ty2) {
@@ -522,6 +523,73 @@ public:
return Cost;
+ unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
+ VectorType *VT = dyn_cast<VectorType>(VecTy);
+ assert(VT && "Expect a vector type for interleaved memory op");
+ unsigned NumElts = VT->getNumElements();
+ assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
+ unsigned NumSubElts = NumElts / Factor;
+ VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
+ // Firstly, the cost of load/store operation.
+ unsigned Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace);
+ // Then plus the cost of interleave operation.
+ if (Opcode == Instruction::Load) {
+ // The interleave cost is similar to extract sub vectors' elements
+ // from the wide vector, and insert them into sub vectors.
+ //
+ // E.g. An interleaved load of factor 2 (with one member of index 0):
+ // %vec = load <8 x i32>, <8 x i32>* %ptr
+ // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
+ // The cost is estimated as extract elements at 0, 2, 4, 6 from the
+ // <8 x i32> vector and insert them into a <4 x i32> vector.
+ assert(Indices.size() <= Factor &&
+ "Interleaved memory op has too many members");
+ for (unsigned Index : Indices) {
+ assert(Index < Factor && "Invalid index for interleaved memory op");
+ // Extract elements from loaded vector for each sub vector.
+ for (unsigned i = 0; i < NumSubElts; i++)
+ Cost += getVectorInstrCost(Instruction::ExtractElement, VT,
+ Index + i * Factor);
+ }
+ unsigned InsSubCost = 0;
+ for (unsigned i = 0; i < NumSubElts; i++)
+ InsSubCost += getVectorInstrCost(Instruction::InsertElement, SubVT, i);
+ Cost += Indices.size() * InsSubCost;
+ } else {
+ // The interleave cost is extract all elements from sub vectors, and
+ // insert them into the wide vector.
+ //
+ // E.g. An interleaved store of factor 2:
+ // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
+ // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
+ // The cost is estimated as extract all elements from both <4 x i32>
+ // vectors and insert into the <8 x i32> vector.
+ unsigned ExtSubCost = 0;
+ for (unsigned i = 0; i < NumSubElts; i++)
+ ExtSubCost += getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
+ Cost += Factor * ExtSubCost;
+ for (unsigned i = 0; i < NumElts; i++)
+ Cost += getVectorInstrCost(Instruction::InsertElement, VT, i);
+ }
+ return Cost;
+ }
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> Tys) {
unsigned ISD = 0;
diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h
index a1b9b4e..b824df3 100644
--- a/include/llvm/CodeGen/CommandFlags.h
+++ b/include/llvm/CodeGen/CommandFlags.h
@@ -24,6 +24,7 @@
#include "llvm/Support/Host.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRecip.h"
#include <string>
using namespace llvm;
@@ -152,6 +153,12 @@ FuseFPOps("fp-contract",
"Only fuse FP ops when the result won't be effected."),
+ cl::CommaSeparated,
+ cl::desc("Choose reciprocal operation types and parameters."),
+ cl::value_desc("all,none,default,divf,!vec-sqrtd,vec-divd:0,sqrt:9..."));
cl::desc("Don't place zero-initialized symbols into bss section"),
@@ -230,6 +237,7 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
TargetOptions Options;
Options.LessPreciseFPMADOption = EnableFPMAD;
Options.AllowFPOpFusion = FuseFPOps;
+ Options.Reciprocals = TargetRecip(ReciprocalOps);
Options.UnsafeFPMath = EnableUnsafeFPMath;
Options.NoInfsFPMath = EnableNoInfsFPMath;
Options.NoNaNsFPMath = EnableNoNaNsFPMath;
diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h
index 8e40ef7..464e0fa 100644
--- a/include/llvm/CodeGen/DIE.h
+++ b/include/llvm/CodeGen/DIE.h
@@ -105,153 +105,13 @@ public:
-/// DIE - A structured debug information entry. Has an abbreviation which
-/// describes its organization.
-class DIEValue;
-class DIE {
- /// Offset - Offset in debug info section.
- ///
- unsigned Offset;
- /// Size - Size of instance + children.
- ///
- unsigned Size;
- /// Abbrev - Buffer for constructing abbreviation.
- ///
- DIEAbbrev Abbrev;
- /// Children DIEs.
- ///
- // This can't be a vector<DIE> because pointer validity is requirent for the
- // Parent pointer and DIEEntry.
- // It can't be a list<DIE> because some clients need pointer validity before
- // the object has been added to any child list
- // (eg: DwarfUnit::constructVariableDIE). These aren't insurmountable, but may
- // be more convoluted than beneficial.
- std::vector<std::unique_ptr<DIE>> Children;
- DIE *Parent;
- /// Attribute values.
- ///
- SmallVector<DIEValue *, 12> Values;
- DIE()
- : Offset(0), Size(0), Abbrev((dwarf::Tag)0, dwarf::DW_CHILDREN_no),
- Parent(nullptr) {}
- explicit DIE(dwarf::Tag Tag)
- : Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no),
- Parent(nullptr) {}
- // Accessors.
- DIEAbbrev &getAbbrev() { return Abbrev; }
- const DIEAbbrev &getAbbrev() const { return Abbrev; }
- unsigned getAbbrevNumber() const { return Abbrev.getNumber(); }
- dwarf::Tag getTag() const { return Abbrev.getTag(); }
- unsigned getOffset() const { return Offset; }
- unsigned getSize() const { return Size; }
- const std::vector<std::unique_ptr<DIE>> &getChildren() const {
- return Children;
- }
- const SmallVectorImpl<DIEValue *> &getValues() const { return Values; }
- DIE *getParent() const { return Parent; }
- /// Climb up the parent chain to get the compile or type unit DIE this DIE
- /// belongs to.
- const DIE *getUnit() const;
- /// Similar to getUnit, returns null when DIE is not added to an
- /// owner yet.
- const DIE *getUnitOrNull() const;
- void setOffset(unsigned O) { Offset = O; }
- void setSize(unsigned S) { Size = S; }
- /// addValue - Add a value and attributes to a DIE.
- ///
- void addValue(dwarf::Attribute Attribute, dwarf::Form Form, DIEValue *Value) {
- Abbrev.AddAttribute(Attribute, Form);
- Values.push_back(Value);
- }
- /// addChild - Add a child to the DIE.
- ///
- void addChild(std::unique_ptr<DIE> Child) {
- assert(!Child->getParent());
- Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
- Child->Parent = this;
- Children.push_back(std::move(Child));
- }
- /// findAttribute - Find a value in the DIE with the attribute given,
- /// returns NULL if no such attribute exists.
- DIEValue *findAttribute(dwarf::Attribute Attribute) const;
-#ifndef NDEBUG
- void print(raw_ostream &O, unsigned IndentCount = 0) const;
- void dump();
-/// DIEValue - A debug information entry value. Some of these roughly correlate
-/// to DWARF attribute classes.
-class DIEValue {
- enum Type {
- isInteger,
- isString,
- isExpr,
- isLabel,
- isDelta,
- isEntry,
- isTypeSignature,
- isBlock,
- isLoc,
- isLocList,
- };
- /// Ty - Type of data stored in the value.
- ///
- Type Ty;
- explicit DIEValue(Type T) : Ty(T) {}
- ~DIEValue() {}
- // Accessors
- Type getType() const { return Ty; }
- /// EmitValue - Emit value via the Dwarf writer.
- ///
- void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
- /// SizeOf - Return the size of a value in bytes.
- ///
- unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
-#ifndef NDEBUG
- void print(raw_ostream &O) const;
- void dump() const;
/// DIEInteger - An integer value DIE.
-class DIEInteger : public DIEValue {
- friend DIEValue;
+class DIEInteger {
uint64_t Integer;
- explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {}
+ explicit DIEInteger(uint64_t I) : Integer(I) {}
/// BestForm - Choose the best form for integer.
@@ -278,120 +138,91 @@ public:
uint64_t getValue() const { return Integer; }
void setValue(uint64_t Val) { Integer = Val; }
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
- void EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const;
- unsigned SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const;
+ void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
#ifndef NDEBUG
- void printImpl(raw_ostream &O) const;
+ void print(raw_ostream &O) const;
/// DIEExpr - An expression DIE.
-class DIEExpr : public DIEValue {
- friend class DIEValue;
+class DIEExpr {
const MCExpr *Expr;
- explicit DIEExpr(const MCExpr *E) : DIEValue(isExpr), Expr(E) {}
+ explicit DIEExpr(const MCExpr *E) : Expr(E) {}
/// getValue - Get MCExpr.
const MCExpr *getValue() const { return Expr; }
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *E) { return E->getType() == isExpr; }
- void EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const;
- unsigned SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const;
+ void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
#ifndef NDEBUG
- void printImpl(raw_ostream &O) const;
+ void print(raw_ostream &O) const;
/// DIELabel - A label DIE.
-class DIELabel : public DIEValue {
- friend class DIEValue;
+class DIELabel {
const MCSymbol *Label;
- explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {}
+ explicit DIELabel(const MCSymbol *L) : Label(L) {}
/// getValue - Get MCSymbol.
const MCSymbol *getValue() const { return Label; }
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
- void EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const;
- unsigned SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const;
+ void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
#ifndef NDEBUG
- void printImpl(raw_ostream &O) const;
+ void print(raw_ostream &O) const;
/// DIEDelta - A simple label difference DIE.
-class DIEDelta : public DIEValue {
- friend class DIEValue;
+class DIEDelta {
const MCSymbol *LabelHi;
const MCSymbol *LabelLo;
- DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo)
- : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {}
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
+ DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo) : LabelHi(Hi), LabelLo(Lo) {}
- void EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const;
- unsigned SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const;
+ void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
#ifndef NDEBUG
- void printImpl(raw_ostream &O) const;
+ void print(raw_ostream &O) const;
/// DIEString - A container for string values.
-class DIEString : public DIEValue {
- friend class DIEValue;
+class DIEString {
DwarfStringPoolEntryRef S;
- DIEString(DwarfStringPoolEntryRef S) : DIEValue(isString), S(S) {}
+ DIEString(DwarfStringPoolEntryRef S) : S(S) {}
/// getString - Grab the string out of the object.
StringRef getString() const { return S.getString(); }
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *D) { return D->getType() == isString; }
- void EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const;
- unsigned SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const;
+ void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
#ifndef NDEBUG
- void printImpl(raw_ostream &O) const;
+ void print(raw_ostream &O) const;
@@ -399,72 +230,350 @@ private:
/// DIEEntry - A pointer to another debug information entry. An instance of
/// this class can also be used as a proxy for a debug information entry not
/// yet defined (ie. types.)
-class DIEEntry : public DIEValue {
- friend class DIEValue;
+class DIE;
+class DIEEntry {
+ DIE *Entry;
- DIE &Entry;
+ DIEEntry() = delete;
- explicit DIEEntry(DIE &E) : DIEValue(isEntry), Entry(E) {
- }
+ explicit DIEEntry(DIE &E) : Entry(&E) {}
- DIE &getEntry() const { return Entry; }
+ DIE &getEntry() const { return *Entry; }
/// Returns size of a ref_addr entry.
static unsigned getRefAddrSize(const AsmPrinter *AP);
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
- void EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const;
- unsigned SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+ void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP)
: sizeof(int32_t);
#ifndef NDEBUG
- void printImpl(raw_ostream &O) const;
+ void print(raw_ostream &O) const;
/// \brief A signature reference to a type unit.
-class DIETypeSignature : public DIEValue {
- friend class DIEValue;
+class DIETypeSignature {
+ const DwarfTypeUnit *Unit;
- const DwarfTypeUnit &Unit;
+ DIETypeSignature() = delete;
- explicit DIETypeSignature(const DwarfTypeUnit &Unit)
- : DIEValue(isTypeSignature), Unit(Unit) {}
+ explicit DIETypeSignature(const DwarfTypeUnit &Unit) : Unit(&Unit) {}
- // \brief Implement isa/cast/dyncast.
- static bool classof(const DIEValue *E) {
- return E->getType() == isTypeSignature;
+ void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+ assert(Form == dwarf::DW_FORM_ref_sig8);
+ return 8;
+#ifndef NDEBUG
+ void print(raw_ostream &O) const;
+/// DIELocList - Represents a pointer to a location list in the debug_loc
+/// section.
+class DIELocList {
+ // Index into the .debug_loc vector.
+ size_t Index;
+ DIELocList(size_t I) : Index(I) {}
+ /// getValue - Grab the current index out.
+ size_t getValue() const { return Index; }
+ void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
+#ifndef NDEBUG
+ void print(raw_ostream &O) const;
+/// DIEValue - A debug information entry value. Some of these roughly correlate
+/// to DWARF attribute classes.
+class DIEBlock;
+class DIELoc;
+class DIEValue {
+ enum Type {
+ isNone,
+#define HANDLE_DIEVALUE(T) is##T,
+#include "llvm/CodeGen/DIEValue.def"
+ };
- void EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const;
- unsigned SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
- assert(Form == dwarf::DW_FORM_ref_sig8);
- return 8;
+ /// Ty - Type of data stored in the value.
+ ///
+ Type Ty = isNone;
+ dwarf::Attribute Attribute = (dwarf::Attribute)0;
+ dwarf::Form Form = (dwarf::Form)0;
+ /// Storage for the value.
+ ///
+ /// All values that aren't standard layout (or are larger than 8 bytes)
+ /// should be stored by reference instead of by value.
+ typedef AlignedCharArrayUnion<DIEInteger, DIEString, DIEExpr, DIELabel,
+ DIEDelta *, DIEEntry, DIETypeSignature,
+ DIEBlock *, DIELoc *, DIELocList> ValTy;
+ static_assert(sizeof(ValTy) <= sizeof(uint64_t) ||
+ sizeof(ValTy) <= sizeof(void *),
+ "Expected all large types to be stored via pointer");
+ /// Underlying stored value.
+ ValTy Val;
+ template <class T> void construct(T V) {
+ static_assert(std::is_standard_layout<T>::value ||
+ std::is_pointer<T>::value,
+ "Expected standard layout or pointer");
+ new (reinterpret_cast<void *>(Val.buffer)) T(V);
+ }
+ template <class T> T *get() { return reinterpret_cast<T *>(Val.buffer); }
+ template <class T> const T *get() const {
+ return reinterpret_cast<const T *>(Val.buffer);
+ }
+ template <class T> void destruct() { get<T>()->~T(); }
+ /// Destroy the underlying value.
+ ///
+ /// This should get optimized down to a no-op. We could skip it if we could
+ /// add a static assert on \a std::is_trivially_copyable(), but we currently
+ /// support versions of GCC that don't understand that.
+ void destroyVal() {
+ switch (Ty) {
+ case isNone:
+ return;
+ case is##T: \
+ destruct<DIE##T>();
+ return;
+ case is##T: \
+ destruct<const DIE##T *>();
+ return;
+#include "llvm/CodeGen/DIEValue.def"
+ }
+ }
+ /// Copy the underlying value.
+ ///
+ /// This should get optimized down to a simple copy. We need to actually
+ /// construct the value, rather than calling memcpy, to satisfy strict
+ /// aliasing rules.
+ void copyVal(const DIEValue &X) {
+ switch (Ty) {
+ case isNone:
+ return;
+ case is##T: \
+ construct<DIE##T>(*X.get<DIE##T>()); \
+ return;
+ case is##T: \
+ construct<const DIE##T *>(*X.get<const DIE##T *>()); \
+ return;
+#include "llvm/CodeGen/DIEValue.def"
+ }
+ DIEValue() = default;
+ DIEValue(const DIEValue &X) : Ty(X.Ty), Attribute(X.Attribute), Form(X.Form) {
+ copyVal(X);
+ }
+ DIEValue &operator=(const DIEValue &X) {
+ destroyVal();
+ Ty = X.Ty;
+ Attribute = X.Attribute;
+ Form = X.Form;
+ copyVal(X);
+ return *this;
+ }
+ ~DIEValue() { destroyVal(); }
+ DIEValue(dwarf::Attribute Attribute, dwarf::Form Form, const DIE##T &V) \
+ : Ty(is##T), Attribute(Attribute), Form(Form) { \
+ construct<DIE##T>(V); \
+ }
+ DIEValue(dwarf::Attribute Attribute, dwarf::Form Form, const DIE##T *V) \
+ : Ty(is##T), Attribute(Attribute), Form(Form) { \
+ assert(V && "Expected valid value"); \
+ construct<const DIE##T *>(V); \
+ }
+#include "llvm/CodeGen/DIEValue.def"
+ // Accessors
+ Type getType() const { return Ty; }
+ dwarf::Attribute getAttribute() const { return Attribute; }
+ dwarf::Form getForm() const { return Form; }
+ explicit operator bool() const { return Ty; }
+ const DIE##T &getDIE##T() const { \
+ assert(getType() == is##T && "Expected " #T); \
+ return *get<DIE##T>(); \
+ }
+ const DIE##T &getDIE##T() const { \
+ assert(getType() == is##T && "Expected " #T); \
+ return **get<const DIE##T *>(); \
+ }
+#include "llvm/CodeGen/DIEValue.def"
+ /// EmitValue - Emit value via the Dwarf writer.
+ ///
+ void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ /// SizeOf - Return the size of a value in bytes.
+ ///
+ unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
#ifndef NDEBUG
- void printImpl(raw_ostream &O) const;
+ void print(raw_ostream &O) const;
+ void dump() const;
+/// DIE - A structured debug information entry. Has an abbreviation which
+/// describes its organization.
+class DIE {
+ /// Offset - Offset in debug info section.
+ ///
+ unsigned Offset;
+ /// Size - Size of instance + children.
+ ///
+ unsigned Size;
+ unsigned AbbrevNumber = ~0u;
+ /// Tag - Dwarf tag code.
+ ///
+ dwarf::Tag Tag = (dwarf::Tag)0;
+ /// Children DIEs.
+ ///
+ // This can't be a vector<DIE> because pointer validity is requirent for the
+ // Parent pointer and DIEEntry.
+ // It can't be a list<DIE> because some clients need pointer validity before
+ // the object has been added to any child list
+ // (eg: DwarfUnit::constructVariableDIE). These aren't insurmountable, but may
+ // be more convoluted than beneficial.
+ std::vector<std::unique_ptr<DIE>> Children;
+ DIE *Parent;
+ /// Attribute values.
+ ///
+ SmallVector<DIEValue, 12> Values;
+ DIE() : Offset(0), Size(0), Parent(nullptr) {}
+ explicit DIE(dwarf::Tag Tag)
+ : Offset(0), Size(0), Tag(Tag), Parent(nullptr) {}
+ // Accessors.
+ unsigned getAbbrevNumber() const { return AbbrevNumber; }
+ dwarf::Tag getTag() const { return Tag; }
+ unsigned getOffset() const { return Offset; }
+ unsigned getSize() const { return Size; }
+ bool hasChildren() const { return !Children.empty(); }
+ typedef std::vector<std::unique_ptr<DIE>>::const_iterator child_iterator;
+ typedef iterator_range<child_iterator> child_range;
+ child_range children() const {
+ return llvm::make_range(Children.begin(), Children.end());
+ }
+ typedef SmallVectorImpl<DIEValue>::const_iterator value_iterator;
+ typedef iterator_range<value_iterator> value_range;
+ value_iterator values_begin() const { return Values.begin(); }
+ value_iterator values_end() const { return Values.end(); }
+ value_range values() const {
+ return llvm::make_range(values_begin(), values_end());
+ }
+ void setValue(unsigned I, DIEValue New) {
+ assert(I < Values.size());
+ Values[I] = New;
+ }
+ DIE *getParent() const { return Parent; }
+ /// Generate the abbreviation for this DIE.
+ ///
+ /// Calculate the abbreviation for this, which should be uniqued and
+ /// eventually used to call \a setAbbrevNumber().
+ DIEAbbrev generateAbbrev() const;
+ /// Set the abbreviation number for this DIE.
+ void setAbbrevNumber(unsigned I) { AbbrevNumber = I; }
+ /// Climb up the parent chain to get the compile or type unit DIE this DIE
+ /// belongs to.
+ const DIE *getUnit() const;
+ /// Similar to getUnit, returns null when DIE is not added to an
+ /// owner yet.
+ const DIE *getUnitOrNull() const;
+ void setOffset(unsigned O) { Offset = O; }
+ void setSize(unsigned S) { Size = S; }
+ /// addValue - Add a value and attributes to a DIE.
+ ///
+ void addValue(DIEValue Value) { Values.push_back(Value); }
+ template <class T>
+ void addValue(dwarf::Attribute Attribute, dwarf::Form Form, T &&Value) {
+ Values.emplace_back(Attribute, Form, std::forward<T>(Value));
+ }
+ /// addChild - Add a child to the DIE.
+ ///
+ DIE &addChild(std::unique_ptr<DIE> Child) {
+ assert(!Child->getParent());
+ Child->Parent = this;
+ Children.push_back(std::move(Child));
+ return *Children.back();
+ }
+ /// Find a value in the DIE with the attribute given.
+ ///
+ /// Returns a default-constructed DIEValue (where \a DIEValue::getType()
+ /// gives \a DIEValue::isNone) if no such attribute exists.
+ DIEValue findAttribute(dwarf::Attribute Attribute) const;
+#ifndef NDEBUG
+ void print(raw_ostream &O, unsigned IndentCount = 0) const;
+ void dump();
/// DIELoc - Represents an expression location.
-class DIELoc : public DIEValue, public DIE {
- friend class DIEValue;
+class DIELoc : public DIE {
mutable unsigned Size; // Size in bytes excluding size header.
- DIELoc() : DIEValue(isLoc), Size(0) {}
+ DIELoc() : Size(0) {}
/// ComputeSize - Calculate the size of the location expression.
@@ -485,27 +594,22 @@ public:
return dwarf::DW_FORM_block;
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *E) { return E->getType() == isLoc; }
- void EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const;
- unsigned SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const;
+ void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
#ifndef NDEBUG
- void printImpl(raw_ostream &O) const;
+ void print(raw_ostream &O) const;
/// DIEBlock - Represents a block of values.
-class DIEBlock : public DIEValue, public DIE {
- friend class DIEValue;
+class DIEBlock : public DIE {
mutable unsigned Size; // Size in bytes excluding size header.
- DIEBlock() : DIEValue(isBlock), Size(0) {}
+ DIEBlock() : Size(0) {}
/// ComputeSize - Calculate the size of the location expression.
@@ -523,43 +627,11 @@ public:
return dwarf::DW_FORM_block;
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
- void EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const;
- unsigned SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const;
-#ifndef NDEBUG
- void printImpl(raw_ostream &O) const;
-/// DIELocList - Represents a pointer to a location list in the debug_loc
-/// section.
-class DIELocList : public DIEValue {
- friend class DIEValue;
- // Index into the .debug_loc vector.
- size_t Index;
- DIELocList(size_t I) : DIEValue(isLocList), Index(I) {}
- /// getValue - Grab the current index out.
- size_t getValue() const { return Index; }
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *E) { return E->getType() == isLocList; }
- void EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const;
- unsigned SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const;
+ void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
#ifndef NDEBUG
- void printImpl(raw_ostream &O) const;
+ void print(raw_ostream &O) const;
diff --git a/include/llvm/CodeGen/DIEValue.def b/include/llvm/CodeGen/DIEValue.def
new file mode 100644
index 0000000..2cfae7b
--- /dev/null
+++ b/include/llvm/CodeGen/DIEValue.def
@@ -0,0 +1,47 @@
+//===- llvm/CodeGen/DIEValue.def - DIEValue types ---------------*- C++ -*-===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// Macros for running through all types of DIEValue.
+#if !(defined HANDLE_DIEVALUE || defined HANDLE_DIEVALUE_SMALL || \
+#error "Missing macro definition of HANDLE_DIEVALUE"
+// Handler for all values.
+// Handler for small values.
+// Handler for large values.
diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h
index 357b2d8..e883bd1 100644
--- a/include/llvm/CodeGen/GCMetadata.h
+++ b/include/llvm/CodeGen/GCMetadata.h
@@ -121,7 +121,7 @@ public:
/// label just prior to the safe point (if the code generator is using
/// MachineModuleInfo).
void addSafePoint(GC::PointKind Kind, MCSymbol *Label, DebugLoc DL) {
- SafePoints.push_back(GCPoint(Kind, Label, DL));
+ SafePoints.emplace_back(Kind, Label, DL);
/// getFrameSize/setFrameSize - Records the function's frame size.
diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h
index de855f2..c97c636 100644
--- a/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/include/llvm/CodeGen/LiveRangeEdit.h
@@ -102,6 +102,10 @@ private:
/// registers are created.
void MRI_NoteNewVirtualRegister(unsigned VReg) override;
+ /// \brief Check if MachineOperand \p MO is a last use/kill either in the
+ /// main live range of \p LI or in one of the matching subregister ranges.
+ bool useIsKill(const LiveInterval &LI, const MachineOperand &MO) const;
/// Create a LiveRangeEdit for breaking down parent into smaller pieces.
/// @param parent The register being spilled or split.
diff --git a/include/llvm/CodeGen/MIRParser/MIRParser.h b/include/llvm/CodeGen/MIRParser/MIRParser.h
new file mode 100644
index 0000000..710b2d4
--- /dev/null
+++ b/include/llvm/CodeGen/MIRParser/MIRParser.h
@@ -0,0 +1,52 @@
+//===- MIRParser.h - MIR serialization format parser ----------------------===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This MIR serialization library is currently a work in progress. It can't
+// serialize machine functions at this time.
+// This file declares the functions that parse the MIR serialization format
+// files.
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <memory>
+namespace llvm {
+class SMDiagnostic;
+/// This function is the main interface to the MIR serialization format parser.
+/// It reads a YAML file that has an optional LLVM IR and returns an LLVM
+/// module.
+/// \param Filename - The name of the file to parse.
+/// \param Error - Error result info.
+/// \param Context - Context in which to allocate globals info.
+std::unique_ptr<Module> parseMIRFile(StringRef Filename, SMDiagnostic &Error,
+ LLVMContext &Context);
+/// This function is another interface to the MIR serialization format parser.
+/// It parses the optional LLVM IR in the given buffer, and returns an LLVM
+/// module.
+/// \param Contents - The MemoryBuffer containing the machine level IR.
+/// \param Error - Error result info.
+/// \param Context - Context in which to allocate globals info.
+std::unique_ptr<Module> parseMIR(std::unique_ptr<MemoryBuffer> Contents,
+ SMDiagnostic &Error, LLVMContext &Context);
+} // end namespace llvm
diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h
new file mode 100644
index 0000000..f9d4c74
--- /dev/null
+++ b/include/llvm/CodeGen/MIRYamlMapping.h
@@ -0,0 +1,40 @@
+//===- MIRYAMLMapping.h - Describes the mapping between MIR and YAML ------===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// The MIR serialization library is currently a work in progress. It can't
+// serialize machine functions at this time.
+// This file implements the mapping between various MIR data structures and
+// their corresponding YAML representation.
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/YAMLTraits.h"
+namespace llvm {
+namespace yaml {
+struct MachineFunction {
+ StringRef Name;
+template <> struct MappingTraits<MachineFunction> {
+ static void mapping(IO &YamlIO, MachineFunction &MF) {
+ YamlIO.mapRequired("name", MF.Name);
+ }
+} // end namespace yaml
+} // end namespace llvm
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index 40f3b49..3889d471 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -256,11 +256,6 @@ class MachineFrameInfo {
/// Not null, if shrink-wrapping found a better place for the epilogue.
MachineBasicBlock *Restore;
- /// Check if it exists a path from \p MBB leading to the basic
- /// block with a SavePoint (a.k.a. prologue).
- bool isBeforeSavePoint(const MachineFunction &MF,
- const MachineBasicBlock &MBB) const;
explicit MachineFrameInfo(unsigned StackAlign, bool isStackRealign,
bool RealignOpt)
@@ -627,16 +622,15 @@ public:
MachineBasicBlock *getRestorePoint() const { return Restore; }
void setRestorePoint(MachineBasicBlock *NewRestore) { Restore = NewRestore; }
- /// getPristineRegs - Return a set of physical registers that are pristine on
- /// entry to the MBB.
+ /// Return a set of physical registers that are pristine.
/// Pristine registers hold a value that is useless to the current function,
- /// but that must be preserved - they are callee saved registers that have not
- /// been saved yet.
+ /// but that must be preserved - they are callee saved registers that are not
+ /// saved.
/// Before the PrologueEpilogueInserter has placed the CSR spill code, this
/// method always returns an empty set.
- BitVector getPristineRegs(const MachineBasicBlock *MBB) const;
+ BitVector getPristineRegs(const MachineFunction &MF) const;
/// print - Used by the MachineFunction printer to print information about
/// stack objects. Implemented in MachineFunction.cpp
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index e57257c..edda03f 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -331,6 +331,11 @@ public:
operands_begin() + getDesc().getNumDefs(), operands_end());
+ /// Returns the number of the operand iterator \p I points to.
+ unsigned getOperandNo(const_mop_iterator I) const {
+ return I - operands_begin();
+ }
/// Access to memory operands of the instruction
mmo_iterator memoperands_begin() const { return MemRefs; }
mmo_iterator memoperands_end() const { return MemRefs + NumMemRefs; }
@@ -483,6 +488,13 @@ public:
return hasProperty(MCID::NotDuplicable, Type);
+ /// Return true if this instruction is convergent.
+ /// Convergent instructions can only be moved to locations that are
+ /// control-equivalent to their initial position.
+ bool isConvergent(QueryType Type = AnyInBundle) const {
+ return hasProperty(MCID::Convergent, Type);
+ }
/// Returns true if the specified instruction has a delay slot
/// which must be filled by the code generator.
bool hasDelaySlot(QueryType Type = AnyInBundle) const {
@@ -924,7 +936,7 @@ public:
/// For normal instructions, this is derived from the MCInstrDesc.
/// For inline assembly it is derived from the flag words.
- /// Returns NULL if the static register classs constraint cannot be
+ /// Returns NULL if the static register class constraint cannot be
/// determined.
const TargetRegisterClass*
@@ -936,10 +948,10 @@ public:
/// the given \p CurRC.
/// If \p ExploreBundle is set and MI is part of a bundle, all the
/// instructions inside the bundle will be taken into account. In other words,
- /// this method accumulates all the constrains of the operand of this MI and
+ /// this method accumulates all the constraints of the operand of this MI and
/// the related bundle if MI is a bundle or inside a bundle.
- /// Returns the register class that statisfies both \p CurRC and the
+ /// Returns the register class that satisfies both \p CurRC and the
/// constraints set by MI. Returns NULL if such a register class does not
/// exist.
@@ -952,7 +964,7 @@ public:
/// \brief Applies the constraints (def/use) implied by the \p OpIdx operand
/// to the given \p CurRC.
- /// Returns the register class that statisfies both \p CurRC and the
+ /// Returns the register class that satisfies both \p CurRC and the
/// constraints set by \p OpIdx MI. Returns NULL if such a register class
/// does not exist.
diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h
index f7bcf45..438ef2e 100644
--- a/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/include/llvm/CodeGen/MachineLoopInfo.h
@@ -114,7 +114,7 @@ public:
// isLoopHeader - True if the block is a loop header node
- inline bool isLoopHeader(MachineBasicBlock *BB) const {
+ inline bool isLoopHeader(const MachineBasicBlock *BB) const {
return LI.isLoopHeader(BB);
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 39f6954..9c7e7b4 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -17,11 +17,11 @@
#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"
+#include <functional>
#include <string>
namespace llvm {
-class FunctionPass;
class MachineFunctionPass;
class PassConfigImpl;
class PassInfo;
@@ -374,6 +374,10 @@ namespace llvm {
createMachineFunctionPrinterPass(raw_ostream &OS,
const std::string &Banner ="");
+ /// MIRPrinting pass - this pass prints out the LLVM IR into the given stream
+ /// using the MIR serialization format.
+ MachineFunctionPass *createPrintMIRPass(raw_ostream &OS);
/// createCodeGenPreparePass - Transform the code to expose more pattern
/// matching during instruction selection.
FunctionPass *createCodeGenPreparePass(const TargetMachine *TM = nullptr);
@@ -488,6 +492,10 @@ namespace llvm {
/// MachineFunctionPrinterPass - This pass prints out MachineInstr's.
extern char &MachineFunctionPrinterPassID;
+ /// MIRPrintingPass - this pass prints out the LLVM IR using the MIR
+ /// serialization format.
+ extern char &MIRPrintingPassID;
/// TailDuplicate - Duplicate blocks with unconditional branches
/// into tails of their predecessors.
extern char &TailDuplicateID;
@@ -511,6 +519,8 @@ namespace llvm {
/// IfConverter - This pass performs machine code if conversion.
extern char &IfConverterID;
+ FunctionPass *createIfConverter(std::function<bool(const Function &)> Ftor);
/// MachineBlockPlacement - This pass places basic blocks based on branch
/// probabilities.
extern char &MachineBlockPlacementID;
@@ -605,6 +615,9 @@ namespace llvm {
/// UnpackMachineBundles - This pass unpack machine instruction bundles.
extern char &UnpackMachineBundlesID;
+ FunctionPass *
+ createUnpackMachineBundles(std::function<bool(const Function &)> Ftor);
/// FinalizeMachineBundles - This pass finalize machine instruction
/// bundles (created earlier, e.g. during pre-RA scheduling).
extern char &FinalizeMachineBundlesID;
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 1196783..b56d5ec 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -260,7 +260,7 @@ namespace llvm {
#ifndef NDEBUG
const SUnit *Addr = SUnits.empty() ? nullptr : &SUnits[0];
- SUnits.push_back(SUnit(MI, (unsigned)SUnits.size()));
+ SUnits.emplace_back(MI, (unsigned)SUnits.size());
assert((Addr == nullptr || Addr == &SUnits[0]) &&
"SUnits std::vector reallocated on the fly!");
SUnits.back().OrigNode = &SUnits.back();
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 89f9005..78fdd04 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -878,6 +878,10 @@ public:
/// Return an MDNodeSDNode which holds an MDNode.
SDValue getMDNode(const MDNode *MD);
+ /// Return a bitcast using the SDLoc of the value operand, and casting to the
+ /// provided type. Use getNode to set a custom SDLoc.
+ SDValue getBitcast(EVT VT, SDValue V);
/// Return an AddrSpaceCastSDNode.
SDValue getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,
unsigned SrcAS, unsigned DestAS);
diff --git a/include/llvm/CodeGen/WinEHFuncInfo.h b/include/llvm/CodeGen/WinEHFuncInfo.h
index e2644ed..1cff320 100644
--- a/include/llvm/CodeGen/WinEHFuncInfo.h
+++ b/include/llvm/CodeGen/WinEHFuncInfo.h
@@ -23,6 +23,7 @@ class BasicBlock;
class Constant;
class Function;
class GlobalVariable;
+class InvokeInst;
class IntrinsicInst;
class LandingPadInst;
class MCSymbol;
@@ -153,5 +154,11 @@ struct WinEHFuncInfo {
NumIPToStateFuncsVisited(0) {}
+/// Analyze the IR in ParentFn and it's handlers to build WinEHFuncInfo, which
+/// describes the state numbers and tables used by __CxxFrameHandler3. This
+/// analysis assumes that WinEHPrepare has already been run.
+void calculateWinCXXEHStateNumbers(const Function *ParentFn,
+ WinEHFuncInfo &FuncInfo);
diff --git a/include/llvm/Config/ b/include/llvm/Config/
index 67d7c84..211e1d0 100644
--- a/include/llvm/Config/
+++ b/include/llvm/Config/
@@ -6,6 +6,9 @@
/* Bug report URL. */
+/* Default OpenMP runtime used by -fopenmp. */
/* Define if we have libxml2 */
diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h
index dddc7fa..871e60c 100644
--- a/include/llvm/DebugInfo/DIContext.h
+++ b/include/llvm/DebugInfo/DIContext.h
@@ -141,8 +141,7 @@ private:
/// on the fly.
class LoadedObjectInfo {
- LoadedObjectInfo() {}
- virtual ~LoadedObjectInfo() {}
+ virtual ~LoadedObjectInfo() = default;
/// Obtain the Load Address of a section by Name.
@@ -170,7 +169,7 @@ public:
/// Obtain a copy of this LoadedObjectInfo.
/// The caller is responsible for deallocation once the copy is no longer required.
- virtual LoadedObjectInfo *clone() const = 0;
+ virtual std::unique_ptr<LoadedObjectInfo> clone() const = 0;
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index 37b22c2..93e7c79 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -38,12 +38,12 @@ public:
// The size in bytes of the statement information for this compilation unit
// (not including the total_length field itself).
- uint32_t TotalLength;
+ uint64_t TotalLength;
// Version identifier for the statement information format.
uint16_t Version;
// The number of bytes following the prologue_length field to the beginning
// of the first byte of the statement program itself.
- uint32_t PrologueLength;
+ uint64_t PrologueLength;
// The size in bytes of the smallest target machine instruction. Statement
// program opcodes that alter the address register first multiply their
// operands by this value.
@@ -63,14 +63,22 @@ public:
std::vector<const char*> IncludeDirectories;
std::vector<FileNameEntry> FileNames;
+ bool IsDWARF64;
+ uint32_t sizeofTotalLength() const {
+ return IsDWARF64 ? 12 : 4;
+ }
+ uint32_t sizeofPrologueLength() const {
+ return IsDWARF64 ? 8 : 4;
+ }
// Length of the prologue in bytes.
uint32_t getLength() const {
- return PrologueLength + sizeof(TotalLength) + sizeof(Version) +
- sizeof(PrologueLength);
+ return PrologueLength + sizeofTotalLength() + sizeof(Version) +
+ sizeofPrologueLength();
// Length of the line table data in bytes (not including the prologue).
uint32_t getStatementTableLength() const {
- return TotalLength + sizeof(TotalLength) - getLength();
+ return TotalLength + sizeofTotalLength() - getLength();
int32_t getMaxLineIncrementForSpecialOpcode() const {
return LineBase + (int8_t)LineRange - 1;
@@ -163,6 +171,9 @@ public:
struct LineTable {
+ // Represents an invalid row
+ const uint32_t UnknownRowIndex = UINT32_MAX;
void appendRow(const DWARFDebugLine::Row &R) {
@@ -171,7 +182,7 @@ public:
// Returns the index of the row with file/line info for a given address,
- // or -1 if there is no such row.
+ // or UnknownRowIndex if there is no such row.
uint32_t lookupAddress(uint64_t address) const;
bool lookupAddressRange(uint64_t address, uint64_t size,
@@ -203,6 +214,10 @@ public:
typedef SequenceVector::const_iterator SequenceIter;
RowVector Rows;
SequenceVector Sequences;
+ private:
+ uint32_t findRowInSeq(const DWARFDebugLine::Sequence &seq,
+ uint64_t address) const;
const LineTable *getLineTable(uint32_t offset) const;
diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
index 719adbc..074d55e 100644
--- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@@ -390,8 +390,7 @@ public:
for (auto &F : *M) {
if (F.isDeclaration())
- Partitioning.push_back(std::vector<Function*>());
- Partitioning.back().push_back(&F);
+ Partitioning.emplace_back(1, &F);
diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h
index ac0151a..94c4038 100644
--- a/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -15,6 +15,7 @@
#include "JITSymbolFlags.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Memory.h"
#include "llvm/DebugInfo/DIContext.h"
@@ -62,8 +63,6 @@ public:
unsigned EndIdx)
: RTDyld(RTDyld), BeginIdx(BeginIdx), EndIdx(EndIdx) { }
- virtual ~LoadedObjectInfo() = default;
virtual object::OwningBinary<object::ObjectFile>
getObjectForDebug(const object::ObjectFile &Obj) const = 0;
@@ -80,8 +79,8 @@ public:
LoadedObjectInfoHelper(RuntimeDyldImpl &RTDyld, unsigned BeginIdx,
unsigned EndIdx)
: LoadedObjectInfo(RTDyld, BeginIdx, EndIdx) {}
- llvm::LoadedObjectInfo *clone() const override {
- return new Derived(static_cast<const Derived &>(*this));
+ std::unique_ptr<llvm::LoadedObjectInfo> clone() const override {
+ return llvm::make_unique<Derived>(static_cast<const Derived &>(*this));
diff --git a/include/llvm/IR/InlineAsm.h b/include/llvm/IR/InlineAsm.h
index 15942f1..08b5102 100644
--- a/include/llvm/IR/InlineAsm.h
+++ b/include/llvm/IR/InlineAsm.h
@@ -248,6 +248,13 @@ public:
+ Constraint_Um,
+ Constraint_Un,
+ Constraint_Uq,
+ Constraint_Us,
+ Constraint_Ut,
+ Constraint_Uv,
+ Constraint_Uy,
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h
index 9f5e244..8d8c530 100644
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@@ -810,6 +810,7 @@ inline Type *checkGEPType(Type *Ty) {
class GetElementPtrInst : public Instruction {
Type *SourceElementType;
+ Type *ResultElementType;
GetElementPtrInst(const GetElementPtrInst &GEPI);
void init(Value *Ptr, ArrayRef<Value *> IdxList, const Twine &NameStr);
@@ -903,9 +904,12 @@ public:
Type *getSourceElementType() const { return SourceElementType; }
void setSourceElementType(Type *Ty) { SourceElementType = Ty; }
+ void setResultElementType(Type *Ty) { ResultElementType = Ty; }
Type *getResultElementType() const {
- return cast<PointerType>(getType()->getScalarType())->getElementType();
+ assert(ResultElementType ==
+ cast<PointerType>(getType()->getScalarType())->getElementType());
+ return ResultElementType;
/// \brief Returns the address space of this instruction's pointer type.
@@ -1028,7 +1032,10 @@ GetElementPtrInst::GetElementPtrInst(Type *PointeeType, Value *Ptr,
: Instruction(getGEPReturnType(PointeeType, Ptr, IdxList), GetElementPtr,
OperandTraits<GetElementPtrInst>::op_end(this) - Values,
Values, InsertBefore),
- SourceElementType(PointeeType) {
+ SourceElementType(PointeeType),
+ ResultElementType(getIndexedType(PointeeType, IdxList)) {
+ assert(ResultElementType ==
+ cast<PointerType>(getType()->getScalarType())->getElementType());
init(Ptr, IdxList, NameStr);
GetElementPtrInst::GetElementPtrInst(Type *PointeeType, Value *Ptr,
@@ -1038,7 +1045,10 @@ GetElementPtrInst::GetElementPtrInst(Type *PointeeType, Value *Ptr,
: Instruction(getGEPReturnType(PointeeType, Ptr, IdxList), GetElementPtr,
OperandTraits<GetElementPtrInst>::op_end(this) - Values,
Values, InsertAtEnd),
- SourceElementType(PointeeType) {
+ SourceElementType(PointeeType),
+ ResultElementType(getIndexedType(PointeeType, IdxList)) {
+ assert(ResultElementType ==
+ cast<PointerType>(getType()->getScalarType())->getElementType());
init(Ptr, IdxList, NameStr);
diff --git a/include/llvm/IR/ b/include/llvm/IR/
index 8f6cdeb..beeffde 100644
--- a/include/llvm/IR/
+++ b/include/llvm/IR/
@@ -537,7 +537,8 @@ def int_experimental_patchpoint_i64 : Intrinsic<[llvm_i64_ty],
def int_experimental_gc_statepoint : Intrinsic<[llvm_i32_ty],
[llvm_i64_ty, llvm_i32_ty,
llvm_anyptr_ty, llvm_i32_ty,
- llvm_i32_ty, llvm_vararg_ty]>;
+ llvm_i32_ty, llvm_vararg_ty],
+ [Throws]>;
def int_experimental_gc_result : Intrinsic<[llvm_any_ty], [llvm_i32_ty]>;
def int_experimental_gc_relocate : Intrinsic<[llvm_anyptr_ty],
diff --git a/include/llvm/IR/ b/include/llvm/IR/
index 3a8a4a6..0826aa2 100644
--- a/include/llvm/IR/
+++ b/include/llvm/IR/
@@ -3372,10 +3372,40 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty],
- def int_x86_avx512_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512_mask">,
+ def int_x86_avx512_mask_sqrt_pd_128 : GCCBuiltin<"__builtin_ia32_sqrtpd128_mask">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_sqrt_pd_256 : GCCBuiltin<"__builtin_ia32_sqrtpd256_mask">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
+ llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_sqrt_ps_128 : GCCBuiltin<"__builtin_ia32_sqrtps128_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_sqrt_ps_256 : GCCBuiltin<"__builtin_ia32_sqrtps256_mask">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512_mask">,
+ Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
+ llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_getexp_pd_128 : GCCBuiltin<"__builtin_ia32_getexppd128_mask">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_getexp_pd_256 : GCCBuiltin<"__builtin_ia32_getexppd256_mask">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_getexp_pd_512 : GCCBuiltin<"__builtin_ia32_getexppd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512_mask">,
+ def int_x86_avx512_mask_getexp_ps_128 : GCCBuiltin<"__builtin_ia32_getexpps128_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_getexp_ps_256 : GCCBuiltin<"__builtin_ia32_getexpps256_mask">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_getexp_ps_512 : GCCBuiltin<"__builtin_ia32_getexpps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
diff --git a/include/llvm/IR/MDBuilder.h b/include/llvm/IR/MDBuilder.h
index ba14457..ceb1c73 100644
--- a/include/llvm/IR/MDBuilder.h
+++ b/include/llvm/IR/MDBuilder.h
@@ -153,7 +153,7 @@ public:
/// \brief Return metadata for a TBAA tag node with the given
/// base type, access type and offset relative to the base type.
MDNode *createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType,
- uint64_t Offset);
+ uint64_t Offset, bool IsConstant = false);
} // end namespace llvm
diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h
index 3bf2943..19a1d6c 100644
--- a/include/llvm/IR/Value.h
+++ b/include/llvm/IR/Value.h
@@ -69,9 +69,8 @@ class Value {
Type *VTy;
Use *UseList;
- friend class ValueAsMetadata; // Allow access to NameAndIsUsedByMD.
+ friend class ValueAsMetadata; // Allow access to IsUsedByMD.
friend class ValueHandleBase;
- PointerIntPair<ValueName *, 1> NameAndIsUsedByMD;
const unsigned char SubclassID; // Subclass identifier (for isa/dyn_cast)
unsigned char HasValueHandle : 1; // Has a ValueHandle pointing to this?
@@ -101,7 +100,10 @@ protected:
/// This is stored here to save space in User on 64-bit hosts. Since most
/// instances of Value have operands, 32-bit hosts aren't significantly
/// affected.
- unsigned NumOperands;
+ unsigned NumOperands : 30;
+ bool IsUsedByMD : 1;
+ bool HasName : 1;
template <typename UseT> // UseT == 'Use' or 'const Use'
@@ -210,9 +212,9 @@ public:
LLVMContext &getContext() const;
// \brief All values can potentially be named.
- bool hasName() const { return getValueName() != nullptr; }
- ValueName *getValueName() const { return NameAndIsUsedByMD.getPointer(); }
- void setValueName(ValueName *VN) { NameAndIsUsedByMD.setPointer(VN); }
+ bool hasName() const { return HasName; }
+ ValueName *getValueName() const;
+ void setValueName(ValueName *VN);
void destroyValueName();
@@ -394,7 +396,7 @@ public:
bool hasValueHandle() const { return HasValueHandle; }
/// \brief Return true if there is metadata referencing this value.
- bool isUsedByMetadata() const { return NameAndIsUsedByMD.getInt(); }
+ bool isUsedByMetadata() const { return IsUsedByMD; }
/// \brief Strip off pointer casts, all-zero GEPs, and aliases.
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 497ac55..4f95c88 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -289,6 +289,7 @@ void initializeLoopVectorizePass(PassRegistry&);
void initializeSLPVectorizerPass(PassRegistry&);
void initializeBBVectorizePass(PassRegistry&);
void initializeMachineFunctionPrinterPassPass(PassRegistry&);
+void initializeMIRPrintingPassPass(PassRegistry&);
void initializeStackMapLivenessPass(PassRegistry&);
void initializeMachineCombinerPass(PassRegistry &);
void initializeLoadCombinePass(PassRegistry&);
diff --git a/include/llvm/LTO/LTOCodeGenerator.h b/include/llvm/LTO/LTOCodeGenerator.h
index 3b4be81..0c46fc0 100644
--- a/include/llvm/LTO/LTOCodeGenerator.h
+++ b/include/llvm/LTO/LTOCodeGenerator.h
@@ -82,7 +82,7 @@ struct LTOCodeGenerator {
void setShouldInternalize(bool Value) { ShouldInternalize = Value; }
void setShouldEmbedUselists(bool Value) { ShouldEmbedUselists = Value; }
- void addMustPreserveSymbol(const char *sym) { MustPreserveSymbols[sym] = 1; }
+ void addMustPreserveSymbol(StringRef sym) { MustPreserveSymbols[sym] = 1; }
// To pass options to the driver and optimization passes. These options are
// not necessarily for debugging purpose (The function name is misleading).
@@ -117,11 +117,10 @@ struct LTOCodeGenerator {
// (linker), it brings the object to a buffer, and return the buffer to the
// caller. This function should delete intermediate object file once its content
// is brought to memory. Return NULL if the compilation was not successful.
- const void *compile(size_t *length,
- bool disableInline,
- bool disableGVNLoadPRE,
- bool disableVectorization,
- std::string &errMsg);
+ std::unique_ptr<MemoryBuffer> compile(bool disableInline,
+ bool disableGVNLoadPRE,
+ bool disableVectorization,
+ std::string &errMsg);
// Optimizes the merged module. Returns true on success.
bool optimize(bool disableInline,
@@ -132,7 +131,7 @@ struct LTOCodeGenerator {
// Compiles the merged optimized module into a single object file. It brings
// the object to a buffer, and returns the buffer to the caller. Return NULL
// if the compilation was not successful.
- const void *compileOptimized(size_t *length, std::string &errMsg);
+ std::unique_ptr<MemoryBuffer> compileOptimized(std::string &errMsg);
void setDiagnosticHandler(lto_diagnostic_handler_t, void *);
@@ -166,7 +165,6 @@ private:
lto_codegen_model CodeModel = LTO_CODEGEN_PIC_MODEL_DEFAULT;
StringSet MustPreserveSymbols;
StringSet AsmUndefinedRefs;
- std::unique_ptr<MemoryBuffer> NativeObjectFile;
std::vector<char *> CodegenOptions;
std::string MCpu;
std::string MAttr;
diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h
index c0a95d4..2bfad2d 100644
--- a/include/llvm/MC/MCAsmBackend.h
+++ b/include/llvm/MC/MCAsmBackend.h
@@ -97,6 +97,12 @@ public:
/// Target specific predicate for whether a given fixup requires the
/// associated instruction to be relaxed.
+ virtual bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const;
+ /// Simple predicate for targets where !Resolved implies requiring relaxation
virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const = 0;
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index 0335f31..9bb0fa6 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -155,6 +155,10 @@ protected:
/// Defaults to false.
bool AllowAtInName;
+ /// If this is true, symbol names with invalid characters will be printed in
+ /// quotes.
+ bool SupportsQuotedNames;
/// This is true if data region markers should be printed as
/// ".data_region/.end_data_region" directives. If false, use "$d/$a" labels
/// instead.
@@ -406,6 +410,10 @@ public:
unsigned Encoding,
MCStreamer &Streamer) const;
+ /// Return true if the identifier \p Name does not need quotes to be
+ /// syntactically correct.
+ virtual bool isValidUnquotedName(StringRef Name) const;
bool usesSunStyleELFSectionSwitchSyntax() const {
return SunStyleELFSectionSwitchSyntax;
@@ -456,6 +464,7 @@ public:
const char *getCode64Directive() const { return Code64Directive; }
unsigned getAssemblerDialect() const { return AssemblerDialect; }
bool doesAllowAtInName() const { return AllowAtInName; }
+ bool supportsNameQuoting() const { return SupportsQuotedNames; }
bool doesSupportDataRegionDirectives() const {
return UseDataRegionDirectives;
diff --git a/include/llvm/MC/MCAsmLayout.h b/include/llvm/MC/MCAsmLayout.h
index fb28420..1b20d5b 100644
--- a/include/llvm/MC/MCAsmLayout.h
+++ b/include/llvm/MC/MCAsmLayout.h
@@ -18,7 +18,6 @@ class MCAssembler;
class MCFragment;
class MCSection;
class MCSymbol;
-class MCSymbolData;
/// Encapsulates the layout of an assembly file at a particular point in time.
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index 593504c..a6178c2 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -12,7 +12,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/ilist.h"
@@ -24,7 +23,6 @@
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/DataTypes.h"
#include <algorithm>
@@ -60,7 +58,8 @@ public:
+ FT_SafeSEH
@@ -531,6 +530,28 @@ public:
+class MCSafeSEHFragment : public MCFragment {
+ virtual void anchor();
+ const MCSymbol *Sym;
+ MCSafeSEHFragment(const MCSymbol *Sym, MCSection *Sec = nullptr)
+ : MCFragment(FT_SafeSEH, Sec), Sym(Sym) {}
+ /// \name Accessors
+ /// @{
+ const MCSymbol *getSymbol() { return Sym; }
+ const MCSymbol *getSymbol() const { return Sym; }
+ /// @}
+ static bool classof(const MCFragment *F) {
+ return F->getKind() == MCFragment::FT_SafeSEH;
+ }
// FIXME: This really doesn't belong here. See comments below.
struct IndirectSymbolData {
MCSymbol *Symbol;
@@ -551,7 +572,7 @@ class MCAssembler {
friend class MCAsmLayout;
- typedef SetVector<MCSection *> SectionListType;
+ typedef std::vector<MCSection *> SectionListType;
typedef std::vector<const MCSymbol *> SymbolDataListType;
typedef pointee_iterator<SectionListType::const_iterator> const_iterator;
@@ -564,9 +585,6 @@ public:
typedef iterator_range<symbol_iterator> symbol_range;
typedef iterator_range<const_symbol_iterator> const_symbol_range;
- typedef std::vector<std::string> FileNameVectorType;
- typedef FileNameVectorType::const_iterator const_file_name_iterator;
typedef std::vector<IndirectSymbolData>::const_iterator
typedef std::vector<IndirectSymbolData>::iterator indirect_symbol_iterator;
@@ -613,7 +631,7 @@ private:
std::vector<std::vector<std::string>> LinkerOptions;
/// List of declared file names
- FileNameVectorType FileNames;
+ std::vector<std::string> FileNames;
/// The set of function symbols for which a .thumb_func directive has
/// been seen.
@@ -883,39 +901,21 @@ public:
/// \name Backend Data Access
/// @{
- bool registerSection(MCSection &Section) { return Sections.insert(&Section); }
- bool hasSymbolData(const MCSymbol &Symbol) const { return Symbol.hasData(); }
- MCSymbolData &getSymbolData(const MCSymbol &Symbol) {
- return const_cast<MCSymbolData &>(
- static_cast<const MCAssembler &>(*this).getSymbolData(Symbol));
- }
- const MCSymbolData &getSymbolData(const MCSymbol &Symbol) const {
- return Symbol.getData();
- }
- MCSymbolData &getOrCreateSymbolData(const MCSymbol &Symbol,
- bool *Created = nullptr) {
- if (Created)
- *Created = !hasSymbolData(Symbol);
- if (!hasSymbolData(Symbol)) {
- Symbol.initializeData();
- Symbols.push_back(&Symbol);
- }
- return Symbol.getData();
+ bool registerSection(MCSection &Section) {
+ if (Section.isRegistered())
+ return false;
+ Sections.push_back(&Section);
+ Section.setIsRegistered(true);
+ return true;
- const_file_name_iterator file_names_begin() const {
- return FileNames.begin();
- }
+ void registerSymbol(const MCSymbol &Symbol, bool *Created = nullptr);
- const_file_name_iterator file_names_end() const { return FileNames.end(); }
+ ArrayRef<std::string> getFileNames() { return FileNames; }
void addFileName(StringRef FileName) {
- if (std::find(file_names_begin(), file_names_end(), FileName) ==
- file_names_end())
+ if (std::find(FileNames.begin(), FileNames.end(), FileName) ==
+ FileNames.end())
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 5b57b9d..1790905 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -30,6 +30,7 @@ namespace llvm {
class MCExpr;
class MCSection;
class MCSymbol;
+ class MCSymbolELF;
class MCLabel;
struct MCDwarfFile;
class MCDwarfLoc;
@@ -75,7 +76,7 @@ namespace llvm {
/// ELF sections can have a corresponding symbol. This maps one to the
/// other.
- DenseMap<const MCSectionELF *, MCSymbol *> SectionSymbols;
+ DenseMap<const MCSectionELF *, MCSymbolELF *> SectionSymbols;
/// A mapping from a local label number and an instance count to a symbol.
/// For example, in the assembly
@@ -205,7 +206,10 @@ namespace llvm {
/// Do automatic reset in destructor
bool AutoReset;
- MCSymbol *CreateSymbol(StringRef Name, bool AlwaysAddSuffix);
+ MCSymbol *createSymbolImpl(const StringMapEntry<bool> *Name,
+ bool IsTemporary);
+ MCSymbol *createSymbol(StringRef Name, bool AlwaysAddSuffix,
+ bool IsTemporary);
MCSymbol *getOrCreateDirectionalLocalSymbol(unsigned LocalLabelVal,
unsigned Instance);
@@ -263,7 +267,7 @@ namespace llvm {
/// \param Name - The symbol name, which must be unique across all symbols.
MCSymbol *getOrCreateSymbol(const Twine &Name);
- MCSymbol *getOrCreateSectionSymbol(const MCSectionELF &Section);
+ MCSymbolELF *getOrCreateSectionSymbol(const MCSectionELF &Section);
/// Gets a symbol that will be defined to the final stack offset of a local
/// variable after codegen.
@@ -340,18 +344,18 @@ namespace llvm {
MCSectionELF *getELFSection(StringRef Section, unsigned Type,
unsigned Flags, unsigned EntrySize,
- const MCSymbol *Group, unsigned UniqueID,
+ const MCSymbolELF *Group, unsigned UniqueID,
const char *BeginSymName,
const MCSectionELF *Associated);
MCSectionELF *createELFRelSection(StringRef Name, unsigned Type,
unsigned Flags, unsigned EntrySize,
- const MCSymbol *Group,
+ const MCSymbolELF *Group,
const MCSectionELF *Associated);
void renameELFSection(MCSectionELF *Section, StringRef Name);
- MCSectionELF *createELFGroupSection(const MCSymbol *Group);
+ MCSectionELF *createELFGroupSection(const MCSymbolELF *Group);
MCSectionCOFF *getCOFFSection(StringRef Section, unsigned Characteristics,
SectionKind Kind, StringRef COMDATSymName,
diff --git a/include/llvm/MC/MCELF.h b/include/llvm/MC/MCELF.h
deleted file mode 100644
index f409988..0000000
--- a/include/llvm/MC/MCELF.h
+++ /dev/null
@@ -1,35 +0,0 @@
-//===- lib/MC/MCELF.h - ELF MC --------------------------------------------===//
-// The LLVM Compiler Infrastructure
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// This file contains some support functions used by the ELF Streamer and
-// ObjectWriter.
-#ifndef LLVM_MC_MCELF_H
-#define LLVM_MC_MCELF_H
-namespace llvm {
-class MCSymbolData;
-class MCELF {
- public:
- static void SetBinding(MCSymbolData &SD, unsigned Binding);
- static unsigned GetBinding(const MCSymbolData &SD);
- static void SetType(MCSymbolData &SD, unsigned Type);
- static unsigned GetType(const MCSymbolData &SD);
- static void SetVisibility(MCSymbolData &SD, unsigned Visibility);
- static unsigned GetVisibility(const MCSymbolData &SD);
- static void setOther(MCSymbolData &SD, unsigned Other);
- static unsigned getOther(const MCSymbolData &SD);
diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h
index cf73eca..01f694d 100644
--- a/include/llvm/MC/MCELFObjectWriter.h
+++ b/include/llvm/MC/MCELFObjectWriter.h
@@ -21,17 +21,17 @@ class MCFixup;
class MCFragment;
class MCObjectWriter;
class MCSymbol;
-class MCSymbolData;
+class MCSymbolELF;
class MCValue;
class raw_pwrite_stream;
struct ELFRelocationEntry {
uint64_t Offset; // Where is the relocation.
- const MCSymbol *Symbol; // The symbol to relocate with.
+ const MCSymbolELF *Symbol; // The symbol to relocate with.
unsigned Type; // The type of the relocation.
uint64_t Addend; // The addend to use.
- ELFRelocationEntry(uint64_t Offset, const MCSymbol *Symbol, unsigned Type,
+ ELFRelocationEntry(uint64_t Offset, const MCSymbolELF *Symbol, unsigned Type,
uint64_t Addend)
: Offset(Offset), Symbol(Symbol), Type(Type), Addend(Addend) {}
@@ -69,7 +69,7 @@ public:
virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const = 0;
- virtual bool needsRelocateWithSymbol(const MCSymbolData &SD,
+ virtual bool needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const;
virtual void sortRelocs(const MCAssembler &Asm,
diff --git a/include/llvm/MC/MCELFStreamer.h b/include/llvm/MC/MCELFStreamer.h
index 97058f5..241db0dc 100644
--- a/include/llvm/MC/MCELFStreamer.h
+++ b/include/llvm/MC/MCELFStreamer.h
@@ -23,8 +23,6 @@ class MCAssembler;
class MCCodeEmitter;
class MCExpr;
class MCInst;
-class MCSymbol;
-class MCSymbolData;
class raw_ostream;
class MCELFStreamer : public MCObjectStreamer {
@@ -39,7 +37,6 @@ public:
void reset() override {
SeenIdent = false;
- BindingExplicitlySet.clear();
@@ -62,7 +59,7 @@ public:
void EmitCOFFSymbolType(int Type) override;
void EndCOFFSymbolDef() override;
- void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) override;
+ void emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value) override;
void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override;
@@ -108,8 +105,6 @@ private:
std::vector<LocalCommon> LocalCommons;
- SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
/// BundleGroups - The stack of fragments holding the bundle-locked
/// instructions.
llvm::SmallVector<MCDataFragment *, 4> BundleGroups;
diff --git a/include/llvm/MC/MCELFSymbolFlags.h b/include/llvm/MC/MCELFSymbolFlags.h
deleted file mode 100644
index 297c442..0000000
--- a/include/llvm/MC/MCELFSymbolFlags.h
+++ /dev/null
@@ -1,56 +0,0 @@
-//===- MCELFSymbolFlags.h - ELF Symbol Flags ----------------*- C++ -*-===//
-// The LLVM Compiler Infrastructure
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// This file declares the SymbolFlags used for the ELF target.
-#include "llvm/Support/ELF.h"
-// Because all the symbol flags need to be stored in the MCSymbolData
-// 'flags' variable we need to provide shift constants per flag type.
-namespace llvm {
- enum {
- ELF_STT_Shift = 0, // Shift value for STT_* flags.
- ELF_STB_Shift = 4, // Shift value for STB_* flags.
- ELF_STV_Shift = 8, // Shift value for STV_* flags.
- ELF_STO_Shift = 10 // Shift value for STO_* flags.
- };
- enum ELFSymbolFlags {
- ELF_STB_Local = (ELF::STB_LOCAL << ELF_STB_Shift),
- ELF_STB_Global = (ELF::STB_GLOBAL << ELF_STB_Shift),
- ELF_STB_Weak = (ELF::STB_WEAK << ELF_STB_Shift),
- ELF_STB_Loproc = (ELF::STB_LOPROC << ELF_STB_Shift),
- ELF_STB_Hiproc = (ELF::STB_HIPROC << ELF_STB_Shift),
- ELF_STT_Notype = (ELF::STT_NOTYPE << ELF_STT_Shift),
- ELF_STT_Object = (ELF::STT_OBJECT << ELF_STT_Shift),
- ELF_STT_Func = (ELF::STT_FUNC << ELF_STT_Shift),
- ELF_STT_Section = (ELF::STT_SECTION << ELF_STT_Shift),
- ELF_STT_File = (ELF::STT_FILE << ELF_STT_Shift),
- ELF_STT_Common = (ELF::STT_COMMON << ELF_STT_Shift),
- ELF_STT_Tls = (ELF::STT_TLS << ELF_STT_Shift),
- ELF_STT_Loproc = (ELF::STT_LOPROC << ELF_STT_Shift),
- ELF_STT_Hiproc = (ELF::STT_HIPROC << ELF_STT_Shift),
- ELF_STV_Default = (ELF::STV_DEFAULT << ELF_STV_Shift),
- ELF_STV_Internal = (ELF::STV_INTERNAL << ELF_STV_Shift),
- ELF_STV_Hidden = (ELF::STV_HIDDEN << ELF_STV_Shift),
- ELF_STV_Protected = (ELF::STV_PROTECTED << ELF_STV_Shift)
- };
-} // end namespace llvm
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index b38ad7d..b3a6073 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -46,7 +46,7 @@ private:
MCExpr(const MCExpr&) = delete;
void operator=(const MCExpr&) = delete;
- bool EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
+ bool evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
const MCAsmLayout *Layout,
const SectionAddrMap *Addrs) const;
@@ -57,7 +57,7 @@ private:
explicit MCExpr(ExprKind Kind) : Kind(Kind) {}
- bool EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
+ bool evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
const MCAsmLayout *Layout,
const MCFixup *Fixup,
const SectionAddrMap *Addrs, bool InSet) const;
@@ -72,7 +72,7 @@ public:
/// \name Utility Methods
/// @{
- void print(raw_ostream &OS) const;
+ void print(raw_ostream &OS, const MCAsmInfo *MAI) const;
void dump() const;
/// @}
@@ -86,11 +86,11 @@ public:
/// values. If not given, then only non-symbolic expressions will be
/// evaluated.
/// \return - True on success.
- bool EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout,
+ bool evaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout,
const SectionAddrMap &Addrs) const;
- bool EvaluateAsAbsolute(int64_t &Res) const;
- bool EvaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const;
- bool EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout) const;
+ bool evaluateAsAbsolute(int64_t &Res) const;
+ bool evaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const;
+ bool evaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout) const;
bool evaluateKnownAbsolute(int64_t &Res, const MCAsmLayout &Layout) const;
@@ -101,13 +101,13 @@ public:
/// \param Layout - The assembler layout object to use for evaluating values.
/// \param Fixup - The Fixup object if available.
/// \return - True on success.
- bool EvaluateAsRelocatable(MCValue &Res, const MCAsmLayout *Layout,
+ bool evaluateAsRelocatable(MCValue &Res, const MCAsmLayout *Layout,
const MCFixup *Fixup) const;
/// \brief Try to evaluate the expression to the form (a - b + constant) where
/// neither a nor b are variables.
- /// This is a more aggressive variant of EvaluateAsRelocatable. The intended
+ /// This is a more aggressive variant of evaluateAsRelocatable. The intended
/// use is for when relocations are not available, like the .size directive.
bool evaluateAsValue(MCValue &Res, const MCAsmLayout &Layout) const;
@@ -115,13 +115,13 @@ public:
/// currently defined as the absolute section for constants, or
/// otherwise the section associated with the first defined symbol in the
/// expression.
- MCSection *FindAssociatedSection() const;
+ MCSection *findAssociatedSection() const;
/// @}
inline raw_ostream &operator<<(raw_ostream &OS, const MCExpr &E) {
- E.print(OS);
+ E.print(OS, nullptr);
return OS;
@@ -136,7 +136,7 @@ public:
/// \name Construction
/// @{
- static const MCConstantExpr *Create(int64_t Value, MCContext &Ctx);
+ static const MCConstantExpr *create(int64_t Value, MCContext &Ctx);
/// @}
/// \name Accessors
@@ -312,13 +312,13 @@ public:
/// \name Construction
/// @{
- static const MCSymbolRefExpr *Create(const MCSymbol *Symbol, MCContext &Ctx) {
- return MCSymbolRefExpr::Create(Symbol, VK_None, Ctx);
+ static const MCSymbolRefExpr *create(const MCSymbol *Symbol, MCContext &Ctx) {
+ return MCSymbolRefExpr::create(Symbol, VK_None, Ctx);
- static const MCSymbolRefExpr *Create(const MCSymbol *Symbol, VariantKind Kind,
+ static const MCSymbolRefExpr *create(const MCSymbol *Symbol, VariantKind Kind,
MCContext &Ctx);
- static const MCSymbolRefExpr *Create(StringRef Name, VariantKind Kind,
+ static const MCSymbolRefExpr *create(StringRef Name, VariantKind Kind,
MCContext &Ctx);
/// @}
@@ -369,19 +369,19 @@ public:
/// \name Construction
/// @{
- static const MCUnaryExpr *Create(Opcode Op, const MCExpr *Expr,
+ static const MCUnaryExpr *create(Opcode Op, const MCExpr *Expr,
MCContext &Ctx);
- static const MCUnaryExpr *CreateLNot(const MCExpr *Expr, MCContext &Ctx) {
- return Create(LNot, Expr, Ctx);
+ static const MCUnaryExpr *createLNot(const MCExpr *Expr, MCContext &Ctx) {
+ return create(LNot, Expr, Ctx);
- static const MCUnaryExpr *CreateMinus(const MCExpr *Expr, MCContext &Ctx) {
- return Create(Minus, Expr, Ctx);
+ static const MCUnaryExpr *createMinus(const MCExpr *Expr, MCContext &Ctx) {
+ return create(Minus, Expr, Ctx);
- static const MCUnaryExpr *CreateNot(const MCExpr *Expr, MCContext &Ctx) {
- return Create(Not, Expr, Ctx);
+ static const MCUnaryExpr *createNot(const MCExpr *Expr, MCContext &Ctx) {
+ return create(Not, Expr, Ctx);
- static const MCUnaryExpr *CreatePlus(const MCExpr *Expr, MCContext &Ctx) {
- return Create(Plus, Expr, Ctx);
+ static const MCUnaryExpr *createPlus(const MCExpr *Expr, MCContext &Ctx) {
+ return create(Plus, Expr, Ctx);
/// @}
@@ -441,83 +441,83 @@ public:
/// \name Construction
/// @{
- static const MCBinaryExpr *Create(Opcode Op, const MCExpr *LHS,
+ static const MCBinaryExpr *create(Opcode Op, const MCExpr *LHS,
const MCExpr *RHS, MCContext &Ctx);
- static const MCBinaryExpr *CreateAdd(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createAdd(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(Add, LHS, RHS, Ctx);
+ return create(Add, LHS, RHS, Ctx);
- static const MCBinaryExpr *CreateAnd(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createAnd(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(And, LHS, RHS, Ctx);
+ return create(And, LHS, RHS, Ctx);
- static const MCBinaryExpr *CreateDiv(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createDiv(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(Div, LHS, RHS, Ctx);
+ return create(Div, LHS, RHS, Ctx);
- static const MCBinaryExpr *CreateEQ(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createEQ(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(EQ, LHS, RHS, Ctx);
+ return create(EQ, LHS, RHS, Ctx);
- static const MCBinaryExpr *CreateGT(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createGT(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(GT, LHS, RHS, Ctx);
+ return create(GT, LHS, RHS, Ctx);
- static const MCBinaryExpr *CreateGTE(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createGTE(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(GTE, LHS, RHS, Ctx);
+ return create(GTE, LHS, RHS, Ctx);
- static const MCBinaryExpr *CreateLAnd(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createLAnd(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(LAnd, LHS, RHS, Ctx);
+ return create(LAnd, LHS, RHS, Ctx);
- static const MCBinaryExpr *CreateLOr(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createLOr(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(LOr, LHS, RHS, Ctx);
+ return create(LOr, LHS, RHS, Ctx);
- static const MCBinaryExpr *CreateLT(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createLT(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(LT, LHS, RHS, Ctx);
+ return create(LT, LHS, RHS, Ctx);
- static const MCBinaryExpr *CreateLTE(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createLTE(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(LTE, LHS, RHS, Ctx);
+ return create(LTE, LHS, RHS, Ctx);
- static const MCBinaryExpr *CreateMod(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createMod(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(Mod, LHS, RHS, Ctx);
+ return create(Mod, LHS, RHS, Ctx);
- static const MCBinaryExpr *CreateMul(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createMul(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(Mul, LHS, RHS, Ctx);
+ return create(Mul, LHS, RHS, Ctx);
- static const MCBinaryExpr *CreateNE(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createNE(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(NE, LHS, RHS, Ctx);
+ return create(NE, LHS, RHS, Ctx);
- static const MCBinaryExpr *CreateOr(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createOr(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(Or, LHS, RHS, Ctx);
+ return create(Or, LHS, RHS, Ctx);
- static const MCBinaryExpr *CreateShl(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createShl(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(Shl, LHS, RHS, Ctx);
+ return create(Shl, LHS, RHS, Ctx);
- static const MCBinaryExpr *CreateAShr(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createAShr(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(AShr, LHS, RHS, Ctx);
+ return create(AShr, LHS, RHS, Ctx);
- static const MCBinaryExpr *CreateLShr(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createLShr(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(LShr, LHS, RHS, Ctx);
+ return create(LShr, LHS, RHS, Ctx);
- static const MCBinaryExpr *CreateSub(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createSub(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(Sub, LHS, RHS, Ctx);
+ return create(Sub, LHS, RHS, Ctx);
- static const MCBinaryExpr *CreateXor(const MCExpr *LHS, const MCExpr *RHS,
+ static const MCBinaryExpr *createXor(const MCExpr *LHS, const MCExpr *RHS,
MCContext &Ctx) {
- return Create(Xor, LHS, RHS, Ctx);
+ return create(Xor, LHS, RHS, Ctx);
/// @}
@@ -551,13 +551,12 @@ protected:
MCTargetExpr() : MCExpr(Target) {}
virtual ~MCTargetExpr() {}
- virtual void PrintImpl(raw_ostream &OS) const = 0;
- virtual bool EvaluateAsRelocatableImpl(MCValue &Res,
+ virtual void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const = 0;
+ virtual bool evaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const = 0;
virtual void visitUsedExpr(MCStreamer& Streamer) const = 0;
- virtual MCSection *FindAssociatedSection() const = 0;
+ virtual MCSection *findAssociatedSection() const = 0;
virtual void fixELFSymbolsInTLSFixups(MCAssembler &) const = 0;
diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h
index 2fc5091..4688b5f 100644
--- a/include/llvm/MC/MCInst.h
+++ b/include/llvm/MC/MCInst.h
@@ -32,12 +32,12 @@ class MCInst;
/// This is a simple discriminated union.
class MCOperand {
enum MachineOperandType : unsigned char {
- kInvalid, ///< Uninitialized.
- kRegister, ///< Register operand.
- kImmediate, ///< Immediate operand.
- kFPImmediate, ///< Floating-point immediate operand.
- kExpr, ///< Relocatable immediate operand.
- kInst ///< Sub-instruction operand.
+ kInvalid, ///< Uninitialized.
+ kRegister, ///< Register operand.
+ kImmediate, ///< Immediate operand.
+ kFPImmediate, ///< Floating-point immediate operand.
+ kExpr, ///< Relocatable immediate operand.
+ kInst ///< Sub-instruction operand.
MachineOperandType Kind;
@@ -48,8 +48,8 @@ class MCOperand {
const MCExpr *ExprVal;
const MCInst *InstVal;
MCOperand() : Kind(kInvalid), FPImmVal(0.0) {}
bool isValid() const { return Kind != kInvalid; }
@@ -151,6 +151,7 @@ class MCInst {
unsigned Opcode;
SMLoc Loc;
SmallVector<MCOperand, 8> Operands;
MCInst() : Opcode(0) {}
@@ -164,18 +165,16 @@ public:
MCOperand &getOperand(unsigned i) { return Operands[i]; }
unsigned getNumOperands() const { return Operands.size(); }
- void addOperand(const MCOperand &Op) {
- Operands.push_back(Op);
- }
- void clear() { Operands.clear(); }
- size_t size() const { return Operands.size(); }
+ void addOperand(const MCOperand &Op) { Operands.push_back(Op); }
typedef SmallVectorImpl<MCOperand>::iterator iterator;
typedef SmallVectorImpl<MCOperand>::const_iterator const_iterator;
+ void clear() { Operands.clear(); }
+ void erase(iterator I) { Operands.erase(I); }
+ size_t size() const { return Operands.size(); }
iterator begin() { return Operands.begin(); }
const_iterator begin() const { return Operands.begin(); }
- iterator end() { return Operands.end(); }
+ iterator end() { return Operands.end(); }
const_iterator end() const { return Operands.end(); }
iterator insert(iterator I, const MCOperand &Op) {
return Operands.insert(I, Op);
diff --git a/include/llvm/MC/MCInstPrinter.h b/include/llvm/MC/MCInstPrinter.h
index 7e8563a..0eafd02 100644
--- a/include/llvm/MC/MCInstPrinter.h
+++ b/include/llvm/MC/MCInstPrinter.h
@@ -10,6 +10,7 @@
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Format.h"
@@ -22,11 +23,14 @@ class MCRegisterInfo;
class MCSubtargetInfo;
class StringRef;
+/// Convert `Bytes' to a hex string and output to `OS'
+void dumpBytes(ArrayRef<uint8_t> Bytes, raw_ostream &OS);
namespace HexStyle {
- enum Style {
- C, ///< 0xff
- Asm ///< 0ffh
- };
+enum Style {
+ C, ///< 0xff
+ Asm ///< 0ffh
/// \brief This is an instance of a target assembly language printer that
@@ -52,12 +56,12 @@ protected:
/// Utility function for printing annotations.
void printAnnotation(raw_ostream &OS, StringRef Annot);
MCInstPrinter(const MCAsmInfo &mai, const MCInstrInfo &mii,
const MCRegisterInfo &mri)
- : CommentStream(nullptr), MAI(mai), MII(mii), MRI(mri),
- UseMarkup(0), PrintImmHex(0),
- PrintHexStyle(HexStyle::C) {}
+ : CommentStream(nullptr), MAI(mai), MII(mii), MRI(mri), UseMarkup(0),
+ PrintImmHex(0), PrintHexStyle(HexStyle::C) {}
virtual ~MCInstPrinter();
@@ -65,8 +69,8 @@ public:
void setCommentStream(raw_ostream &OS) { CommentStream = &OS; }
/// \brief Print the specified MCInst to the specified raw_ostream.
- virtual void printInst(const MCInst *MI, raw_ostream &OS,
- StringRef Annot, const MCSubtargetInfo &STI) = 0;
+ virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+ const MCSubtargetInfo &STI) = 0;
/// \brief Return the name of the specified opcode enum (e.g. "MOV32ri") or
/// empty if we can't resolve it.
@@ -85,8 +89,8 @@ public:
bool getPrintImmHex() const { return PrintImmHex; }
void setPrintImmHex(bool Value) { PrintImmHex = Value; }
- HexStyle::Style getPrintHexStyleHex() const { return PrintHexStyle; }
- void setPrintImmHex(HexStyle::Style Value) { PrintHexStyle = Value; }
+ HexStyle::Style getPrintHexStyle() const { return PrintHexStyle; }
+ void setPrintHexStyle(HexStyle::Style Value) { PrintHexStyle = Value; }
/// Utility function to print immediates in decimal or hex.
format_object<int64_t> formatImm(int64_t Value) const {
diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h
index de3a195..3209a2c 100644
--- a/include/llvm/MC/MCInstrDesc.h
+++ b/include/llvm/MC/MCInstrDesc.h
@@ -125,7 +125,8 @@ enum Flag {
- InsertSubreg
+ InsertSubreg,
+ Convergent
@@ -138,10 +139,10 @@ class MCInstrDesc {
unsigned short Opcode; // The opcode number
unsigned short NumOperands; // Num of args (may be more if variable_ops)
- unsigned short NumDefs; // Num of args that are definitions
+ unsigned char NumDefs; // Num of args that are definitions
+ unsigned char Size; // Number of bytes in encoding.
unsigned short SchedClass; // enum identifying instr sched class
- unsigned short Size; // Number of bytes in encoding.
- unsigned Flags; // Flags identifying machine instr class
+ uint64_t Flags; // Flags identifying machine instr class
uint64_t TSFlags; // Target Specific Flag values
const uint16_t *ImplicitUses; // Registers implicitly read by this instr
const uint16_t *ImplicitDefs; // Registers implicitly defined by this instr
@@ -331,6 +332,13 @@ public:
/// override accordingly.
bool isInsertSubregLike() const { return Flags & (1 << MCID::InsertSubreg); }
+ /// \brief Return true if this instruction is convergent.
+ ///
+ /// Convergent instructions may only be moved to locations that are
+ /// control-equivalent to their original positions.
+ bool isConvergent() const { return Flags & (1 << MCID::Convergent); }
// Side Effect Analysis
diff --git a/include/llvm/MC/MCLabel.h b/include/llvm/MC/MCLabel.h
index de2d0af..a12473f 100644
--- a/include/llvm/MC/MCLabel.h
+++ b/include/llvm/MC/MCLabel.h
@@ -17,41 +17,41 @@
#include "llvm/Support/Compiler.h"
namespace llvm {
- class MCContext;
- class raw_ostream;
- /// \brief Instances of this class represent a label name in the MC file,
- /// and MCLabel are created and uniqued by the MCContext class. MCLabel
- /// should only be constructed for valid instances in the object file.
- class MCLabel {
- // \brief The instance number of this Directional Local Label.
- unsigned Instance;
- private: // MCContext creates and uniques these.
- friend class MCContext;
- MCLabel(unsigned instance)
- : Instance(instance) {}
- MCLabel(const MCLabel&) = delete;
- void operator=(const MCLabel&) = delete;
- public:
- /// \brief Get the current instance of this Directional Local Label.
- unsigned getInstance() const { return Instance; }
- /// \brief Increment the current instance of this Directional Local Label.
- unsigned incInstance() { return ++Instance; }
- /// \brief Print the value to the stream \p OS.
- void print(raw_ostream &OS) const;
- /// \brief Print the value to stderr.
- void dump() const;
- };
- inline raw_ostream &operator<<(raw_ostream &OS, const MCLabel &Label) {
- Label.print(OS);
- return OS;
- }
+class MCContext;
+class raw_ostream;
+/// \brief Instances of this class represent a label name in the MC file,
+/// and MCLabel are created and uniqued by the MCContext class. MCLabel
+/// should only be constructed for valid instances in the object file.
+class MCLabel {
+ // \brief The instance number of this Directional Local Label.
+ unsigned Instance;
+private: // MCContext creates and uniques these.
+ friend class MCContext;
+ MCLabel(unsigned instance) : Instance(instance) {}
+ MCLabel(const MCLabel &) = delete;
+ void operator=(const MCLabel &) = delete;
+ /// \brief Get the current instance of this Directional Local Label.
+ unsigned getInstance() const { return Instance; }
+ /// \brief Increment the current instance of this Directional Local Label.
+ unsigned incInstance() { return ++Instance; }
+ /// \brief Print the value to the stream \p OS.
+ void print(raw_ostream &OS) const;
+ /// \brief Print the value to stderr.
+ void dump() const;
+inline raw_ostream &operator<<(raw_ostream &OS, const MCLabel &Label) {
+ Label.print(OS);
+ return OS;
} // end namespace llvm
diff --git a/include/llvm/MC/MCLinkerOptimizationHint.h b/include/llvm/MC/MCLinkerOptimizationHint.h
index a186a14..4b6f7ec 100644
--- a/include/llvm/MC/MCLinkerOptimizationHint.h
+++ b/include/llvm/MC/MCLinkerOptimizationHint.h
@@ -106,7 +106,7 @@ class MCLOHDirective {
/// Emit this directive in \p OutStream using the information available
/// in the given \p ObjWriter and \p Layout to get the address of the
/// arguments within the object file.
- void Emit_impl(raw_ostream &OutStream, const MachObjectWriter &ObjWriter,
+ void emit_impl(raw_ostream &OutStream, const MachObjectWriter &ObjWriter,
const MCAsmLayout &Layout) const;
@@ -123,9 +123,9 @@ public:
/// Emit this directive as:
/// <kind, numArgs, addr1, ..., addrN>
- void Emit(MachObjectWriter &ObjWriter, const MCAsmLayout &Layout) const {
+ void emit(MachObjectWriter &ObjWriter, const MCAsmLayout &Layout) const {
raw_ostream &OutStream = ObjWriter.getStream();
- Emit_impl(OutStream, ObjWriter, Layout);
+ emit_impl(OutStream, ObjWriter, Layout);
/// Get the size in bytes of this directive if emitted in \p ObjWriter with
@@ -145,7 +145,7 @@ public:
raw_counting_ostream OutStream;
- Emit_impl(OutStream, ObjWriter, Layout);
+ emit_impl(OutStream, ObjWriter, Layout);
return OutStream.tell();
@@ -184,10 +184,10 @@ public:
/// Emit all Linker Optimization Hint in one big table.
- /// Each line of the table is emitted by LOHDirective::Emit.
- void Emit(MachObjectWriter &ObjWriter, const MCAsmLayout &Layout) const {
+ /// Each line of the table is emitted by LOHDirective::emit.
+ void emit(MachObjectWriter &ObjWriter, const MCAsmLayout &Layout) const {
for (const MCLOHDirective &D : Directives)
- D.Emit(ObjWriter, Layout);
+ D.emit(ObjWriter, Layout);
void reset() {
diff --git a/include/llvm/MC/MCMachOSymbolFlags.h b/include/llvm/MC/MCMachOSymbolFlags.h
deleted file mode 100644
index 71f01fa..0000000
--- a/include/llvm/MC/MCMachOSymbolFlags.h
+++ /dev/null
@@ -1,46 +0,0 @@
-//===- MCMachOSymbolFlags.h - MachO Symbol Flags ----------------*- C++ -*-===//
-// The LLVM Compiler Infrastructure
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// This file declares the SymbolFlags used for the MachO target.
-// These flags are mostly used in MCMachOStreamer.cpp but also needed in
-// MachObjectWriter.cpp to test for Weak Definitions of symbols to emit
-// the correct relocation information.
-namespace llvm {
- /// MachOSymbolFlags - We store the value for the 'desc' symbol field in the
- /// lowest 16 bits of the implementation defined flags.
- enum MachOSymbolFlags { // See <mach-o/nlist.h>.
- SF_DescFlagsMask = 0xFFFF,
- // Reference type flags.
- SF_ReferenceTypeMask = 0x0007,
- SF_ReferenceTypeUndefinedNonLazy = 0x0000,
- SF_ReferenceTypeUndefinedLazy = 0x0001,
- SF_ReferenceTypeDefined = 0x0002,
- SF_ReferenceTypePrivateDefined = 0x0003,
- SF_ReferenceTypePrivateUndefinedNonLazy = 0x0004,
- SF_ReferenceTypePrivateUndefinedLazy = 0x0005,
- // Other 'desc' flags.
- SF_ThumbFunc = 0x0008,
- SF_NoDeadStrip = 0x0020,
- SF_WeakReference = 0x0040,
- SF_WeakDefinition = 0x0080,
- SF_SymbolResolver = 0x0100
- };
-} // end namespace llvm
diff --git a/include/llvm/MC/MCMachObjectWriter.h b/include/llvm/MC/MCMachObjectWriter.h
index 63c2a28..175d73e 100644
--- a/include/llvm/MC/MCMachObjectWriter.h
+++ b/include/llvm/MC/MCMachObjectWriter.h
@@ -27,15 +27,11 @@ class MCMachObjectTargetWriter {
const unsigned Is64Bit : 1;
const uint32_t CPUType;
const uint32_t CPUSubtype;
- // FIXME: Remove this, we should just always use it once we no longer care
- // about Darwin 'as' compatibility.
- const unsigned UseAggressiveSymbolFolding : 1;
unsigned LocalDifference_RIT;
MCMachObjectTargetWriter(bool Is64Bit_, uint32_t CPUType_,
- uint32_t CPUSubtype_,
- bool UseAggressiveSymbolFolding_ = false);
+ uint32_t CPUSubtype_);
void setLocalDifferenceRelocationType(unsigned Type) {
LocalDifference_RIT = Type;
@@ -47,7 +43,7 @@ public:
/// \name Lifetime Management
/// @{
- virtual void reset() {};
+ virtual void reset() {}
/// @}
@@ -55,7 +51,6 @@ public:
/// @{
bool is64Bit() const { return Is64Bit; }
- bool useAggressiveSymbolFolding() const { return UseAggressiveSymbolFolding; }
uint32_t getCPUType() const { return CPUType; }
uint32_t getCPUSubtype() const { return CPUSubtype; }
unsigned getLocalDifferenceRelocationType() const {
@@ -67,7 +62,7 @@ public:
/// \name API
/// @{
- virtual void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
+ virtual void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
@@ -77,8 +72,7 @@ public:
class MachObjectWriter : public MCObjectWriter {
- /// MachSymbolData - Helper struct for containing some precomputed information
- /// on symbols.
+ /// Helper struct for containing some precomputed information on symbols.
struct MachSymbolData {
const MCSymbol *Symbol;
uint64_t StringIndex;
@@ -104,6 +98,8 @@ class MachObjectWriter : public MCObjectWriter {
llvm::DenseMap<const MCSection *, std::vector<RelAndSymbol>> Relocations;
llvm::DenseMap<const MCSection *, unsigned> IndirectSymBase;
+ SectionAddrMap SectionAddress;
/// @}
/// \name Symbol Table Data
/// @{
@@ -136,8 +132,6 @@ public:
bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind);
- SectionAddrMap SectionAddress;
SectionAddrMap &getSectionAddressMap() { return SectionAddress; }
uint64_t getSectionAddress(const MCSection *Sec) const {
@@ -165,41 +159,37 @@ public:
/// @}
- void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize,
+ void writeHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize,
bool SubsectionsViaSymbols);
- /// WriteSegmentLoadCommand - Write a segment load command.
+ /// Write a segment load command.
/// \param NumSections The number of sections in this segment.
/// \param SectionDataSize The total size of the sections.
- void WriteSegmentLoadCommand(unsigned NumSections,
- uint64_t VMSize,
+ void writeSegmentLoadCommand(unsigned NumSections, uint64_t VMSize,
uint64_t SectionDataStartOffset,
uint64_t SectionDataSize);
- void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ void writeSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCSection &Sec, uint64_t FileOffset,
uint64_t RelocationsStart, unsigned NumRelocations);
- void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
+ void writeSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
uint32_t StringTableOffset,
uint32_t StringTableSize);
- void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
- uint32_t NumLocalSymbols,
- uint32_t FirstExternalSymbol,
- uint32_t NumExternalSymbols,
- uint32_t FirstUndefinedSymbol,
- uint32_t NumUndefinedSymbols,
- uint32_t IndirectSymbolOffset,
- uint32_t NumIndirectSymbols);
+ void writeDysymtabLoadCommand(
+ uint32_t FirstLocalSymbol, uint32_t NumLocalSymbols,
+ uint32_t FirstExternalSymbol, uint32_t NumExternalSymbols,
+ uint32_t FirstUndefinedSymbol, uint32_t NumUndefinedSymbols,
+ uint32_t IndirectSymbolOffset, uint32_t NumIndirectSymbols);
- void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout);
+ void writeNlist(MachSymbolData &MSD, const MCAsmLayout &Layout);
- void WriteLinkeditLoadCommand(uint32_t Type, uint32_t DataOffset,
+ void writeLinkeditLoadCommand(uint32_t Type, uint32_t DataOffset,
uint32_t DataSize);
- void WriteLinkerOptionsLoadCommand(const std::vector<std::string> &Options);
+ void writeLinkerOptionsLoadCommand(const std::vector<std::string> &Options);
// FIXME: We really need to improve the relocation validation. Basically, we
// want to implement a separate computation which evaluates the relocation
@@ -226,29 +216,25 @@ public:
- void RecordScatteredRelocation(const MCAssembler &Asm,
+ void recordScatteredRelocation(const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
- unsigned Log2Size,
- uint64_t &FixedValue);
+ unsigned Log2Size, uint64_t &FixedValue);
- void RecordTLVPRelocation(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup, MCValue Target,
- uint64_t &FixedValue);
+ void recordTLVPRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue);
- void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
+ void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, bool &IsPCRel,
uint64_t &FixedValue) override;
- void BindIndirectSymbols(MCAssembler &Asm);
+ void bindIndirectSymbols(MCAssembler &Asm);
- /// ComputeSymbolTable - Compute the symbol table data
- ///
- void ComputeSymbolTable(MCAssembler &Asm,
+ /// Compute the symbol table data.
+ void computeSymbolTable(MCAssembler &Asm,
std::vector<MachSymbolData> &LocalSymbolData,
std::vector<MachSymbolData> &ExternalSymbolData,
std::vector<MachSymbolData> &UndefinedSymbolData);
@@ -256,19 +242,18 @@ public:
void computeSectionAddresses(const MCAssembler &Asm,
const MCAsmLayout &Layout);
- void ExecutePostLayoutBinding(MCAssembler &Asm,
+ void executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) override;
- bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+ bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
const MCSymbol &SymA,
const MCFragment &FB, bool InSet,
bool IsPCRel) const override;
- void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
+ void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
-/// \brief Construct a new Mach-O writer instance.
+/// Construct a new Mach-O writer instance.
/// This routine takes ownership of the target writer subclass.
diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h
index f28b9c6..0515f1c 100644
--- a/include/llvm/MC/MCObjectFileInfo.h
+++ b/include/llvm/MC/MCObjectFileInfo.h
@@ -18,28 +18,25 @@
#include "llvm/Support/CodeGen.h"
namespace llvm {
- class MCContext;
- class MCSection;
- class StringRef;
+class MCContext;
+class MCSection;
+class StringRef;
class MCObjectFileInfo {
- /// CommDirectiveSupportsAlignment - True if .comm supports alignment. This
- /// is a hack for as long as we support 10.4 Tiger, whose assembler doesn't
- /// support alignment on comm.
+ /// True if .comm supports alignment. This is a hack for as long as we
+ /// support 10.4 Tiger, whose assembler doesn't support alignment on comm.
bool CommDirectiveSupportsAlignment;
- /// SupportsWeakEmptyEHFrame - True if target object file supports a
- /// weak_definition of constant 0 for an omitted EH frame.
+ /// True if target object file supports a weak_definition of constant 0 for an
+ /// omitted EH frame.
bool SupportsWeakOmittedEHFrame;
- /// SupportsCompactUnwindWithoutEHFrame - True if the target object file
- /// supports emitting a compact unwind section without an associated EH frame
- /// section.
+ /// True if the target object file supports emitting a compact unwind section
+ /// without an associated EH frame section.
bool SupportsCompactUnwindWithoutEHFrame;
- /// PersonalityEncoding, LSDAEncoding, TTypeEncoding - Some encoding values
- /// for EH.
+ /// Some encoding values for EH.
unsigned PersonalityEncoding;
unsigned LSDAEncoding;
unsigned FDECFIEncoding;
@@ -49,16 +46,13 @@ protected:
unsigned EHSectionType;
unsigned EHSectionFlags;
- /// CompactUnwindDwarfEHFrameOnly - Compact unwind encoding indicating that we
- /// should emit only an EH frame.
+ /// Compact unwind encoding indicating that we should emit only an EH frame.
unsigned CompactUnwindDwarfEHFrameOnly;
/// Section directive for standard text.
- ///
MCSection *TextSection;
/// Section directive for standard data.
- ///
MCSection *DataSection;
/// Section that is default initialized to zero.
@@ -101,7 +95,7 @@ protected:
// can be enabled by a compiler flag.
MCSection *DwarfPubNamesSection;
- // DWARF5 Experimental Debug Info Sections
+ /// DWARF5 Experimental Debug Info Sections
/// DwarfAccelNamesSection, DwarfAccelObjCSection,
/// DwarfAccelNamespaceSection, DwarfAccelTypesSection -
/// If we use the DWARF accelerated hash tables then we want to emit these
@@ -111,7 +105,7 @@ protected:
MCSection *DwarfAccelNamespaceSection;
MCSection *DwarfAccelTypesSection;
- /// These are used for the Fission separate debug information files.
+ // These are used for the Fission separate debug information files.
MCSection *DwarfInfoDWOSection;
MCSection *DwarfTypesDWOSection;
MCSection *DwarfAbbrevDWOSection;
@@ -121,32 +115,36 @@ protected:
MCSection *DwarfStrOffDWOSection;
MCSection *DwarfAddrSection;
- /// Sections for newer gnu pubnames and pubtypes.
+ /// Section for newer gnu pubnames.
MCSection *DwarfGnuPubNamesSection;
+ /// Section for newer gnu pubtypes.
MCSection *DwarfGnuPubTypesSection;
MCSection *COFFDebugSymbolsSection;
- // Extra TLS Variable Data section. If the target needs to put additional
- // information for a TLS variable, it'll go here.
+ /// Extra TLS Variable Data section.
+ ///
+ /// If the target needs to put additional information for a TLS variable,
+ /// it'll go here.
MCSection *TLSExtraDataSection;
/// Section directive for Thread Local data. ELF, MachO and COFF.
MCSection *TLSDataSection; // Defaults to ".tdata".
- /// Section directive for Thread Local uninitialized data. Null if this target
- /// doesn't support a BSS section. ELF and MachO only.
+ /// Section directive for Thread Local uninitialized data.
+ ///
+ /// Null if this target doesn't support a BSS section. ELF and MachO only.
MCSection *TLSBSSSection; // Defaults to ".tbss".
/// StackMap section.
MCSection *StackMapSection;
- /// EH frame section. It is initialized on demand so it can be overwritten
- /// (with uniquing).
+ /// EH frame section.
+ ///
+ /// It is initialized on demand so it can be overwritten (with uniquing).
MCSection *EHFrameSection;
- /// ELF specific sections.
- ///
+ // ELF specific sections.
MCSection *DataRelSection;
const MCSection *DataRelLocalSection;
MCSection *DataRelROSection;
@@ -155,17 +153,16 @@ protected:
MCSection *MergeableConst8Section;
MCSection *MergeableConst16Section;
- /// MachO specific sections.
- ///
+ // MachO specific sections.
- /// Section for thread local structure information. Contains the source code
- /// name of the variable, visibility and a pointer to the initial value
- /// (.tdata or .tbss).
+ /// Section for thread local structure information.
+ ///
+ /// Contains the source code name of the variable, visibility and a pointer to
+ /// the initial value (.tdata or .tbss).
MCSection *TLSTLVSection; // Defaults to ".tlv".
- /// TLSThreadInitSection - Section for thread local data initialization
- /// functions.
- const MCSection *TLSThreadInitSection; // Defaults to ".thread_init_func".
+ /// Section for thread local data initialization functions.
+ const MCSection *TLSThreadInitSection; // Defaults to ".thread_init_func".
MCSection *CStringSection;
MCSection *UStringSection;
@@ -182,10 +179,10 @@ protected:
MCSection *NonLazySymbolPointerSection;
/// COFF specific sections.
- ///
MCSection *DrectveSection;
MCSection *PDataSection;
MCSection *XDataSection;
+ MCSection *SXDataSection;
void InitMCObjectFileInfo(StringRef TT, Reloc::Model RM, CodeModel::Model CM,
@@ -266,8 +263,7 @@ public:
MCSection *getStackMapSection() const { return StackMapSection; }
- /// ELF specific sections.
- ///
+ // ELF specific sections.
MCSection *getDataRelSection() const { return DataRelSection; }
const MCSection *getDataRelLocalSection() const {
return DataRelLocalSection;
@@ -284,8 +280,7 @@ public:
return MergeableConst16Section;
- /// MachO specific sections.
- ///
+ // MachO specific sections.
const MCSection *getTLSTLVSection() const { return TLSTLVSection; }
const MCSection *getTLSThreadInitSection() const {
return TLSThreadInitSection;
@@ -316,11 +311,11 @@ public:
return NonLazySymbolPointerSection;
- /// COFF specific sections.
- ///
+ // COFF specific sections.
MCSection *getDrectveSection() const { return DrectveSection; }
MCSection *getPDataSection() const { return PDataSection; }
MCSection *getXDataSection() const { return XDataSection; }
+ MCSection *getSXDataSection() const { return SXDataSection; }
MCSection *getEHFrameSection() {
if (!EHFrameSection)
@@ -329,13 +324,9 @@ public:
enum Environment { IsMachO, IsELF, IsCOFF };
- Environment getObjectFileType() const {
- return Env;
- }
+ Environment getObjectFileType() const { return Env; }
- Reloc::Model getRelocM() const {
- return RelocM;
- }
+ Reloc::Model getRelocM() const { return RelocM; }
Environment Env;
@@ -344,12 +335,11 @@ private:
MCContext *Ctx;
Triple TT;
- void InitMachOMCObjectFileInfo(Triple T);
- void InitELFMCObjectFileInfo(Triple T);
- void InitCOFFMCObjectFileInfo(Triple T);
+ void initMachOMCObjectFileInfo(Triple T);
+ void initELFMCObjectFileInfo(Triple T);
+ void initCOFFMCObjectFileInfo(Triple T);
- /// InitEHFrameSection - Initialize EHFrameSection on demand.
- ///
+ /// Initialize EHFrameSection on demand.
void InitEHFrameSection();
diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h
index e75bc86..462b3b4 100644
--- a/include/llvm/MC/MCObjectStreamer.h
+++ b/include/llvm/MC/MCObjectStreamer.h
@@ -35,11 +35,10 @@ class raw_pwrite_stream;
/// implementation.
class MCObjectStreamer : public MCStreamer {
MCAssembler *Assembler;
- MCSection *CurSectionData;
MCSection::iterator CurInsertionPoint;
bool EmitEHFrame;
bool EmitDebugFrame;
- SmallVector<MCSymbolData *, 2> PendingLabels;
+ SmallVector<MCSymbol *, 2> PendingLabels;
virtual void EmitInstToData(const MCInst &Inst, const MCSubtargetInfo&) = 0;
void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override;
@@ -57,21 +56,17 @@ public:
/// Object streamers require the integrated assembler.
bool isIntegratedAssemblerRequired() const override { return true; }
- MCSymbolData &getOrCreateSymbolData(const MCSymbol *Symbol) {
- return getAssembler().getOrCreateSymbolData(*Symbol);
- }
void EmitFrames(MCAsmBackend *MAB);
void EmitCFISections(bool EH, bool Debug) override;
- MCSection *getCurrentSectionData() const { return CurSectionData; }
MCFragment *getCurrentFragment() const;
void insert(MCFragment *F) {
- CurSectionData->getFragmentList().insert(CurInsertionPoint, F);
- F->setParent(CurSectionData);
+ MCSection *CurSection = getCurrentSectionOnly();
+ CurSection->getFragmentList().insert(CurInsertionPoint, F);
+ F->setParent(CurSection);
/// Get a data fragment to write into, creating a new one if the current
diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h
index 999d294..2211673 100644
--- a/include/llvm/MC/MCObjectWriter.h
+++ b/include/llvm/MC/MCObjectWriter.h
@@ -22,18 +22,17 @@ class MCAsmLayout;
class MCAssembler;
class MCFixup;
class MCFragment;
-class MCSymbolData;
class MCSymbolRefExpr;
class MCValue;
-/// MCObjectWriter - Defines the object file and target independent interfaces
-/// used by the assembler backend to write native file format object files.
+/// Defines the object file and target independent interfaces used by the
+/// assembler backend to write native file format object files.
/// The object writer contains a few callbacks used by the assembler to allow
/// the object writer to modify the assembler data structures at appropriate
/// points. Once assembly is complete, the object writer is given the
/// MCAssembler instance, which contains all the symbol and section data which
-/// should be emitted as part of WriteObject().
+/// should be emitted as part of writeObject().
/// The object writer also contains a number of helper methods for writing
/// binary data to the output stream.
@@ -54,7 +53,7 @@ public:
virtual ~MCObjectWriter();
/// lifetime management
- virtual void reset() { }
+ virtual void reset() {}
bool isLittleEndian() const { return IsLittleEndian; }
@@ -63,109 +62,106 @@ public:
/// \name High-Level API
/// @{
- /// \brief Perform any late binding of symbols (for example, to assign symbol
+ /// Perform any late binding of symbols (for example, to assign symbol
/// indices for use when generating relocations).
/// This routine is called by the assembler after layout and relaxation is
/// complete.
- virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
+ virtual void executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) = 0;
- /// \brief Record a relocation entry.
+ /// Record a relocation entry.
/// This routine is called by the assembler after layout and relaxation, and
/// post layout binding. The implementation is responsible for storing
/// information about the relocation so that it can be emitted during
- /// WriteObject().
- virtual void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
+ /// writeObject().
+ virtual void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
bool &IsPCRel, uint64_t &FixedValue) = 0;
- /// \brief Check whether the difference (A - B) between two symbol
- /// references is fully resolved.
+ /// Check whether the difference (A - B) between two symbol references is
+ /// fully resolved.
/// Clients are not required to answer precisely and may conservatively return
/// false, even when a difference is fully resolved.
- bool IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
+ bool isSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
const MCSymbolRefExpr *A,
const MCSymbolRefExpr *B,
bool InSet) const;
- virtual bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+ virtual bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
const MCSymbol &SymA,
const MCFragment &FB,
bool InSet,
bool IsPCRel) const;
- /// \brief True if this symbol (which is a variable) is weak. This is not
+ /// True if this symbol (which is a variable) is weak. This is not
/// just STB_WEAK, but more generally whether or not we can evaluate
/// past it.
virtual bool isWeak(const MCSymbol &Sym) const;
- /// \brief Write the object file.
+ /// Write the object file.
/// This routine is called by the assembler after layout and relaxation is
/// complete, fixups have been evaluated and applied, and relocations
/// generated.
- virtual void WriteObject(MCAssembler &Asm,
- const MCAsmLayout &Layout) = 0;
+ virtual void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) = 0;
/// @}
/// \name Binary Output
/// @{
- void Write8(uint8_t Value) {
- OS << char(Value);
- }
+ void write8(uint8_t Value) { OS << char(Value); }
- void WriteLE16(uint16_t Value) {
+ void writeLE16(uint16_t Value) {
- void WriteLE32(uint32_t Value) {
+ void writeLE32(uint32_t Value) {
- void WriteLE64(uint64_t Value) {
+ void writeLE64(uint64_t Value) {
- void WriteBE16(uint16_t Value) {
+ void writeBE16(uint16_t Value) {
- void WriteBE32(uint32_t Value) {
+ void writeBE32(uint32_t Value) {
- void WriteBE64(uint64_t Value) {
+ void writeBE64(uint64_t Value) {
- void Write16(uint16_t Value) {
+ void write16(uint16_t Value) {
if (IsLittleEndian)
- WriteLE16(Value);
+ writeLE16(Value);
- WriteBE16(Value);
+ writeBE16(Value);
- void Write32(uint32_t Value) {
+ void write32(uint32_t Value) {
if (IsLittleEndian)
- WriteLE32(Value);
+ writeLE32(Value);
- WriteBE32(Value);
+ writeBE32(Value);
- void Write64(uint64_t Value) {
+ void write64(uint64_t Value) {
if (IsLittleEndian)
- WriteLE64(Value);
+ writeLE64(Value);
- WriteBE64(Value);
+ writeBE64(Value);
void WriteZeros(unsigned N) {
- const char Zeros[16] = { 0 };
+ const char Zeros[16] = {0};
for (unsigned i = 0, e = N / 16; i != e; ++i)
OS << StringRef(Zeros, 16);
@@ -173,22 +169,23 @@ public:
OS << StringRef(Zeros, N % 16);
- void WriteBytes(const SmallVectorImpl<char> &ByteVec, unsigned ZeroFillSize = 0) {
- WriteBytes(StringRef(, ByteVec.size()), ZeroFillSize);
+ void writeBytes(const SmallVectorImpl<char> &ByteVec,
+ unsigned ZeroFillSize = 0) {
+ writeBytes(StringRef(, ByteVec.size()), ZeroFillSize);
- void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) {
+ void writeBytes(StringRef Str, unsigned ZeroFillSize = 0) {
// TODO: this version may need to go away once all fragment contents are
// converted to SmallVector<char, N>
- assert((ZeroFillSize == 0 || Str.size () <= ZeroFillSize) &&
- "data size greater than fill size, unexpected large write will occur");
+ assert(
+ (ZeroFillSize == 0 || Str.size() <= ZeroFillSize) &&
+ "data size greater than fill size, unexpected large write will occur");
OS << Str;
if (ZeroFillSize)
WriteZeros(ZeroFillSize - Str.size());
/// @}
} // End llvm namespace
diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h
index 96a4ef1..5f6e8ec 100644
--- a/include/llvm/MC/MCSection.h
+++ b/include/llvm/MC/MCSection.h
@@ -73,11 +73,13 @@ private:
/// \brief We've seen a bundle_lock directive but not its first instruction
/// yet.
- bool BundleGroupBeforeFirstInst = false;
+ unsigned BundleGroupBeforeFirstInst : 1;
/// Whether this section has had instructions emitted into it.
unsigned HasInstructions : 1;
+ unsigned IsRegistered : 1;
FragmentListType Fragments;
/// Mapping from subsection number to insertion point for subsection numbers
@@ -130,6 +132,9 @@ public:
bool hasInstructions() const { return HasInstructions; }
void setHasInstructions(bool Value) { HasInstructions = Value; }
+ bool isRegistered() const { return IsRegistered; }
+ void setIsRegistered(bool Value) { IsRegistered = Value; }
MCSection::FragmentListType &getFragmentList() { return Fragments; }
const MCSection::FragmentListType &getFragmentList() const {
return const_cast<MCSection *>(this)->getFragmentList();
diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h
index 9efe102..f673037 100644
--- a/include/llvm/MC/MCSectionELF.h
+++ b/include/llvm/MC/MCSectionELF.h
@@ -16,7 +16,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/raw_ostream.h"
@@ -46,7 +46,7 @@ class MCSectionELF : public MCSection {
/// section does not contain fixed-sized entries 'EntrySize' will be 0.
unsigned EntrySize;
- const MCSymbol *Group;
+ const MCSymbolELF *Group;
/// Depending on the type of the section this is sh_link or sh_info.
const MCSectionELF *Associated;
@@ -54,11 +54,14 @@ class MCSectionELF : public MCSection {
friend class MCContext;
MCSectionELF(StringRef Section, unsigned type, unsigned flags, SectionKind K,
- unsigned entrySize, const MCSymbol *group, unsigned UniqueID,
+ unsigned entrySize, const MCSymbolELF *group, unsigned UniqueID,
MCSymbol *Begin, const MCSectionELF *Associated)
: MCSection(SV_ELF, K, Begin), SectionName(Section), Type(type),
Flags(flags), UniqueID(UniqueID), EntrySize(entrySize), Group(group),
- Associated(Associated) {}
+ Associated(Associated) {
+ if (Group)
+ Group->setIsSignature();
+ }
~MCSectionELF() override;
void setSectionName(StringRef Name) { SectionName = Name; }
@@ -73,7 +76,7 @@ public:
unsigned getType() const { return Type; }
unsigned getFlags() const { return Flags; }
unsigned getEntrySize() const { return EntrySize; }
- const MCSymbol *getGroup() const { return Group; }
+ const MCSymbolELF *getGroup() const { return Group; }
void PrintSwitchToSection(const MCAsmInfo &MAI, raw_ostream &OS,
const MCExpr *Subsection) const override;
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 957913e..628fb76 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -34,6 +34,7 @@ class MCInstPrinter;
class MCSection;
class MCStreamer;
class MCSymbol;
+class MCSymbolELF;
class MCSymbolRefExpr;
class MCSubtargetInfo;
class StringRef;
@@ -272,6 +273,7 @@ public:
return SectionStack.back().first;
return MCSectionSubPair();
+ MCSection *getCurrentSectionOnly() const { return getCurrentSection().first; }
/// \brief Return the previous section that the streamer is emitting code to.
MCSectionSubPair getPreviousSection() const {
@@ -305,11 +307,15 @@ public:
bool PopSection() {
if (SectionStack.size() <= 1)
return false;
- MCSectionSubPair oldSection = SectionStack.pop_back_val().first;
- MCSectionSubPair curSection = SectionStack.back().first;
- if (oldSection != curSection)
- ChangeSection(curSection.first, curSection.second);
+ auto I = SectionStack.end();
+ --I;
+ MCSectionSubPair OldSection = I->first;
+ --I;
+ MCSectionSubPair NewSection = I->first;
+ if (OldSection != NewSection)
+ ChangeSection(NewSection.first, NewSection.second);
+ SectionStack.pop_back();
return true;
@@ -433,6 +439,8 @@ public:
/// \brief Marks the end of the symbol definition.
virtual void EndCOFFSymbolDef();
+ virtual void EmitCOFFSafeSEH(MCSymbol const *Symbol);
/// \brief Emits a COFF section index.
/// \param Symbol - Symbol the section number relocation should point to.
@@ -447,7 +455,7 @@ public:
/// This corresponds to an assembler statement such as:
/// .size symbol, expression
- virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value);
/// \brief Emit a Linker Optimization Hint (LOH) directive.
/// \param Args - Arguments of the LOH.
diff --git a/include/llvm/MC/MCSubtargetInfo.h b/include/llvm/MC/MCSubtargetInfo.h
index 1778a6d..ee5d563 100644
--- a/include/llvm/MC/MCSubtargetInfo.h
+++ b/include/llvm/MC/MCSubtargetInfo.h
@@ -73,7 +73,9 @@ public:
/// setFeatureBits - Set the feature bits.
- void setFeatureBits(FeatureBitset& FeatureBits_) { FeatureBits = FeatureBits_; }
+ void setFeatureBits(const FeatureBitset &FeatureBits_) {
+ FeatureBits = FeatureBits_;
+ }
/// InitMCProcessorInfo - Set or change the CPU (optionally supplemented with
/// feature string). Recompute feature bits and scheduling model.
@@ -94,6 +96,10 @@ public:
/// feature bits. This version will also change all implied bits.
FeatureBitset ToggleFeature(StringRef FS);
+ /// Apply a feature flag and return the re-computed feature bits, including
+ /// all feature bits implied by the flag.
+ FeatureBitset ApplyFeatureFlag(StringRef FS);
/// getSchedModelForCPU - Get the machine model of a CPU.
MCSchedModel getSchedModelForCPU(StringRef CPU) const;
diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h
index cf99c91..078f3d7 100644
--- a/include/llvm/MC/MCSymbol.h
+++ b/include/llvm/MC/MCSymbol.h
@@ -14,12 +14,14 @@
-#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
+class MCAsmInfo;
class MCExpr;
class MCSymbol;
class MCFragment;
@@ -27,109 +29,6 @@ class MCSection;
class MCContext;
class raw_ostream;
-// TODO: Merge completely with MCSymbol.
-class MCSymbolData {
- /// Fragment - The fragment this symbol's value is relative to, if any. Also
- /// stores if this symbol is visible outside this translation unit (bit 0) or
- /// if it is private extern (bit 1).
- PointerIntPair<MCFragment *, 2> Fragment;
- union {
- /// Offset - The offset to apply to the fragment address to form this
- /// symbol's value.
- uint64_t Offset;
- /// CommonSize - The size of the symbol, if it is 'common'.
- uint64_t CommonSize;
- };
- /// SymbolSize - An expression describing how to calculate the size of
- /// a symbol. If a symbol has no size this field will be NULL.
- const MCExpr *SymbolSize = nullptr;
- /// CommonAlign - The alignment of the symbol, if it is 'common', or -1.
- //
- // FIXME: Pack this in with other fields?
- unsigned CommonAlign = -1U;
- /// Flags - The Flags field is used by object file implementations to store
- /// additional per symbol information which is not easily classified.
- uint32_t Flags = 0;
- MCSymbolData() { Offset = 0; }
- MCFragment *getFragment() const { return Fragment.getPointer(); }
- void setFragment(MCFragment *Value) { Fragment.setPointer(Value); }
- uint64_t getOffset() const {
- assert(!isCommon());
- return Offset;
- }
- void setOffset(uint64_t Value) {
- assert(!isCommon());
- Offset = Value;
- }
- /// @}
- /// \name Symbol Attributes
- /// @{
- bool isExternal() const { return Fragment.getInt() & 1; }
- void setExternal(bool Value) {
- Fragment.setInt((Fragment.getInt() & ~1) | unsigned(Value));
- }
- bool isPrivateExtern() const { return Fragment.getInt() & 2; }
- void setPrivateExtern(bool Value) {
- Fragment.setInt((Fragment.getInt() & ~2) | (unsigned(Value) << 1));
- }
- /// isCommon - Is this a 'common' symbol.
- bool isCommon() const { return CommonAlign != -1U; }
- /// setCommon - Mark this symbol as being 'common'.
- ///
- /// \param Size - The size of the symbol.
- /// \param Align - The alignment of the symbol.
- void setCommon(uint64_t Size, unsigned Align) {
- assert(getOffset() == 0);
- CommonSize = Size;
- CommonAlign = Align;
- }
- /// getCommonSize - Return the size of a 'common' symbol.
- uint64_t getCommonSize() const {
- assert(isCommon() && "Not a 'common' symbol!");
- return CommonSize;
- }
- void setSize(const MCExpr *SS) { SymbolSize = SS; }
- const MCExpr *getSize() const { return SymbolSize; }
- /// getCommonAlignment - Return the alignment of a 'common' symbol.
- unsigned getCommonAlignment() const {
- assert(isCommon() && "Not a 'common' symbol!");
- return CommonAlign;
- }
- /// getFlags - Get the (implementation defined) symbol flags.
- uint32_t getFlags() const { return Flags; }
- /// setFlags - Set the (implementation defined) symbol flags.
- void setFlags(uint32_t Value) { Flags = Value; }
- /// modifyFlags - Modify the flags via a mask
- void modifyFlags(uint32_t Value, uint32_t Mask) {
- Flags = (Flags & ~Mask) | Value;
- }
- /// @}
- void dump() const;
/// MCSymbol - Instances of this class represent a symbol name in the MC file,
/// and MCSymbols are created and uniqued by the MCContext class. MCSymbols
/// should only be constructed with valid names for the object file.
@@ -138,6 +37,16 @@ public:
/// Section member is set to indicate what section it lives in. Otherwise, if
/// it is a reference to an external entity, it has a null section.
class MCSymbol {
+ /// The kind of the symbol. If it is any value other than unset then this
+ /// class is actually one of the appropriate subclasses of MCSymbol.
+ enum SymbolKind {
+ SymbolKindUnset,
+ SymbolKindCOFF,
+ SymbolKindELF,
+ SymbolKindMachO,
+ };
// Special sentinal value for the absolute pseudo section.
// FIXME: Use a PointerInt wrapper for this?
@@ -147,10 +56,18 @@ class MCSymbol {
/// held by the StringMap that lives in MCContext.
const StringMapEntry<bool> *Name;
- /// The section the symbol is defined in. This is null for undefined symbols,
- /// and the special AbsolutePseudoSection value for absolute symbols. If this
- /// is a variable symbol, this caches the variable value's section.
- mutable MCSection *Section;
+ /// If a symbol has a Fragment, the section is implied, so we only need
+ /// one pointer.
+ /// FIXME: We might be able to simplify this by having the asm streamer create
+ /// dummy fragments.
+ /// If this is a section, then it gives the symbol is defined in. This is null
+ /// for undefined symbols, and the special AbsolutePseudoSection value for
+ /// absolute symbols. If this is a variable symbol, this caches the variable
+ /// value's section.
+ ///
+ /// If this is a fragment, then it gives the fragment this symbol's value is
+ /// relative to, if any.
+ mutable PointerUnion<MCSection *, MCFragment *> SectionOrFragment;
/// Value - If non-null, the value for a variable symbol.
const MCExpr *Value;
@@ -166,46 +83,68 @@ class MCSymbol {
/// IsUsed - True if this symbol has been used.
mutable unsigned IsUsed : 1;
- mutable bool HasData : 1;
+ mutable bool IsRegistered : 1;
+ /// This symbol is visible outside this translation unit.
+ mutable unsigned IsExternal : 1;
+ /// This symbol is private extern.
+ mutable unsigned IsPrivateExtern : 1;
+ /// LLVM RTTI discriminator. This is actually a SymbolKind enumerator, but is
+ /// unsigned to avoid sign extension and achieve better bitpacking with MSVC.
+ unsigned Kind : 2;
/// Index field, for use by the object file implementation.
- mutable uint64_t Index : 60;
+ mutable uint32_t Index = 0;
+ union {
+ /// The offset to apply to the fragment address to form this symbol's value.
+ uint64_t Offset;
+ /// The size of the symbol, if it is 'common'.
+ uint64_t CommonSize;
+ };
- mutable MCSymbolData Data;
+ /// The alignment of the symbol, if it is 'common', or -1.
+ //
+ // FIXME: Pack this in with other fields?
+ unsigned CommonAlign = -1U;
+ /// The Flags field is used by object file implementations to store
+ /// additional per symbol information which is not easily classified.
+ mutable uint32_t Flags = 0;
-private: // MCContext creates and uniques these.
+protected: // MCContext creates and uniques these.
friend class MCExpr;
friend class MCContext;
- MCSymbol(const StringMapEntry<bool> *Name, bool isTemporary)
- : Name(Name), Section(nullptr), Value(nullptr), IsTemporary(isTemporary),
- IsRedefinable(false), IsUsed(false), HasData(false), Index(0) {}
+ MCSymbol(SymbolKind Kind, const StringMapEntry<bool> *Name, bool isTemporary)
+ : Name(Name), Value(nullptr), IsTemporary(isTemporary),
+ IsRedefinable(false), IsUsed(false), IsRegistered(false),
+ IsExternal(false), IsPrivateExtern(false),
+ Kind(Kind) {
+ Offset = 0;
+ }
MCSymbol(const MCSymbol &) = delete;
void operator=(const MCSymbol &) = delete;
MCSection *getSectionPtr() const {
+ if (MCFragment *F = getFragment())
+ return F->getParent();
+ assert(!<MCFragment *>() && "Section or null expected");
+ MCSection *Section = SectionOrFragment.dyn_cast<MCSection *>();
if (Section || !Value)
return Section;
- return Section = Value->FindAssociatedSection();
+ return Section = Value->findAssociatedSection();
/// getName - Get the symbol name.
StringRef getName() const { return Name ? Name->first() : ""; }
- bool hasData() const { return HasData; }
- /// Get associated symbol data.
- MCSymbolData &getData() const {
- assert(HasData && "Missing symbol data!");
- return Data;
- }
- /// Initialize symbol data.
- ///
- /// Nothing really to do here, but this is enables an assertion that \a
- /// MCAssembler::getOrCreateSymbolData() has actually been called before
- /// anyone calls \a getData().
- void initializeData() const { HasData = true; }
+ bool isRegistered() const { return IsRegistered; }
+ void setIsRegistered(bool Value) const { IsRegistered = Value; }
/// \name Accessors
/// @{
@@ -225,7 +164,7 @@ public:
void redefineIfPossible() {
if (IsRedefinable) {
Value = nullptr;
- Section = nullptr;
+ SectionOrFragment = nullptr;
IsRedefinable = false;
@@ -258,11 +197,20 @@ public:
/// Mark the symbol as defined in the section \p S.
void setSection(MCSection &S) {
assert(!isVariable() && "Cannot set section of variable");
- Section = &S;
+ assert(!<MCFragment *>() && "Section or null expected");
+ SectionOrFragment = &S;
+ }
+ /// Mark the symbol as undefined.
+ void setUndefined() {
+ SectionOrFragment = nullptr;
- /// setUndefined - Mark the symbol as undefined.
- void setUndefined() { Section = nullptr; }
+ bool isELF() const { return Kind == SymbolKindELF; }
+ bool isCOFF() const { return Kind == SymbolKindCOFF; }
+ bool isMachO() const { return Kind == SymbolKindMachO; }
/// @}
/// \name Variable Symbols
@@ -283,27 +231,98 @@ public:
/// @}
/// Get the (implementation defined) index.
- uint64_t getIndex() const {
- assert(HasData && "Uninitialized symbol data");
+ uint32_t getIndex() const {
return Index;
/// Set the (implementation defined) index.
- void setIndex(uint64_t Value) const {
- assert(HasData && "Uninitialized symbol data");
- assert(!(Value >> 60) && "Not enough bits for value");
+ void setIndex(uint32_t Value) const {
Index = Value;
+ uint64_t getOffset() const {
+ assert(!isCommon());
+ return Offset;
+ }
+ void setOffset(uint64_t Value) {
+ assert(!isCommon());
+ Offset = Value;
+ }
+ /// Return the size of a 'common' symbol.
+ uint64_t getCommonSize() const {
+ assert(isCommon() && "Not a 'common' symbol!");
+ return CommonSize;
+ }
+ /// Mark this symbol as being 'common'.
+ ///
+ /// \param Size - The size of the symbol.
+ /// \param Align - The alignment of the symbol.
+ void setCommon(uint64_t Size, unsigned Align) {
+ assert(getOffset() == 0);
+ CommonSize = Size;
+ CommonAlign = Align;
+ }
+ /// Return the alignment of a 'common' symbol.
+ unsigned getCommonAlignment() const {
+ assert(isCommon() && "Not a 'common' symbol!");
+ return CommonAlign;
+ }
+ /// Declare this symbol as being 'common'.
+ ///
+ /// \param Size - The size of the symbol.
+ /// \param Align - The alignment of the symbol.
+ /// \return True if symbol was already declared as a different type
+ bool declareCommon(uint64_t Size, unsigned Align) {
+ assert(isCommon() || getOffset() == 0);
+ if(isCommon()) {
+ if(CommonSize != Size || CommonAlign != Align)
+ return true;
+ } else
+ setCommon(Size, Align);
+ return false;
+ }
+ /// Is this a 'common' symbol.
+ bool isCommon() const { return CommonAlign != -1U; }
+ MCFragment *getFragment() const {
+ return SectionOrFragment.dyn_cast<MCFragment *>();
+ }
+ void setFragment(MCFragment *Value) const {
+ SectionOrFragment = Value;
+ }
+ bool isExternal() const { return IsExternal; }
+ void setExternal(bool Value) const { IsExternal = Value; }
+ bool isPrivateExtern() const { return IsPrivateExtern; }
+ void setPrivateExtern(bool Value) { IsPrivateExtern = Value; }
/// print - Print the value to the stream \p OS.
- void print(raw_ostream &OS) const;
+ void print(raw_ostream &OS, const MCAsmInfo *MAI) const;
/// dump - Print the value to stderr.
void dump() const;
+ /// Get the (implementation defined) symbol flags.
+ uint32_t getFlags() const { return Flags; }
+ /// Set the (implementation defined) symbol flags.
+ void setFlags(uint32_t Value) const { Flags = Value; }
+ /// Modify the flags via a mask
+ void modifyFlags(uint32_t Value, uint32_t Mask) const {
+ Flags = (Flags & ~Mask) | Value;
+ }
inline raw_ostream &operator<<(raw_ostream &OS, const MCSymbol &Sym) {
- Sym.print(OS);
+ Sym.print(OS, nullptr);
return OS;
} // end namespace llvm
diff --git a/include/llvm/MC/MCSymbolCOFF.h b/include/llvm/MC/MCSymbolCOFF.h
new file mode 100644
index 0000000..2172c67
--- /dev/null
+++ b/include/llvm/MC/MCSymbolCOFF.h
@@ -0,0 +1,64 @@
+//===- MCSymbolCOFF.h - ----------------------------------------*- C++ -*-===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "llvm/MC/MCSymbol.h"
+namespace llvm {
+class MCSymbolCOFF : public MCSymbol {
+ /// This corresponds to the e_type field of the COFF symbol.
+ mutable uint16_t Type;
+ enum SymbolFlags : uint16_t {
+ SF_ClassMask = 0x00FF,
+ SF_ClassShift = 0,
+ SF_WeakExternal = 0x0100,
+ SF_SafeSEH = 0x0200,
+ };
+ MCSymbolCOFF(const StringMapEntry<bool> *Name, bool isTemporary)
+ : MCSymbol(SymbolKindCOFF, Name, isTemporary), Type(0) {}
+ uint16_t getType() const {
+ return Type;
+ }
+ void setType(uint16_t Ty) const {
+ Type = Ty;
+ }
+ uint16_t getClass() const {
+ return (getFlags() & SF_ClassMask) >> SF_ClassShift;
+ }
+ void setClass(uint16_t StorageClass) const {
+ modifyFlags(StorageClass << SF_ClassShift, SF_ClassMask);
+ }
+ bool isWeakExternal() const {
+ return getFlags() & SF_WeakExternal;
+ }
+ void setIsWeakExternal() const {
+ modifyFlags(SF_WeakExternal, SF_WeakExternal);
+ }
+ bool isSafeSEH() const {
+ return getFlags() & SF_SafeSEH;
+ }
+ void setIsSafeSEH() const {
+ modifyFlags(SF_SafeSEH, SF_SafeSEH);
+ }
+ static bool classof(const MCSymbol *S) { return S->isCOFF(); }
diff --git a/include/llvm/MC/MCSymbolELF.h b/include/llvm/MC/MCSymbolELF.h
new file mode 100644
index 0000000..0cc1115
--- /dev/null
+++ b/include/llvm/MC/MCSymbolELF.h
@@ -0,0 +1,57 @@
+//===- MCSymbolELF.h - -----------------------------------------*- C++ -*-===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "llvm/MC/MCSymbol.h"
+namespace llvm {
+class MCSymbolELF : public MCSymbol {
+ /// An expression describing how to calculate the size of a symbol. If a
+ /// symbol has no size this field will be NULL.
+ const MCExpr *SymbolSize = nullptr;
+ MCSymbolELF(const StringMapEntry<bool> *Name, bool isTemporary)
+ : MCSymbol(SymbolKindELF, Name, isTemporary) {}
+ void setSize(const MCExpr *SS) { SymbolSize = SS; }
+ const MCExpr *getSize() const { return SymbolSize; }
+ void setVisibility(unsigned Visibility);
+ unsigned getVisibility() const;
+ void setOther(unsigned Other);
+ unsigned getOther() const;
+ void setType(unsigned Type) const;
+ unsigned getType() const;
+ void setBinding(unsigned Binding) const;
+ unsigned getBinding() const;
+ bool isBindingSet() const;
+ void setUsedInReloc() const;
+ bool isUsedInReloc() const;
+ void setIsWeakrefUsedInReloc() const;
+ bool isWeakrefUsedInReloc() const;
+ void setIsSignature() const;
+ bool isSignature() const;
+ static bool classof(const MCSymbol *S) { return S->isELF(); }
+ void setIsBindingSet() const;
diff --git a/include/llvm/MC/MCSymbolMachO.h b/include/llvm/MC/MCSymbolMachO.h
new file mode 100644
index 0000000..166ae9e
--- /dev/null
+++ b/include/llvm/MC/MCSymbolMachO.h
@@ -0,0 +1,123 @@
+//===- MCSymbolMachO.h - ---------------------------------------*- C++ -*-===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#define setIsWeakExternal
+#include "llvm/MC/MCSymbol.h"
+namespace llvm {
+class MCSymbolMachO : public MCSymbol {
+ /// \brief We store the value for the 'desc' symbol field in the
+ /// lowest 16 bits of the implementation defined flags.
+ enum MachOSymbolFlags : uint16_t { // See <mach-o/nlist.h>.
+ SF_DescFlagsMask = 0xFFFF,
+ // Reference type flags.
+ SF_ReferenceTypeMask = 0x0007,
+ SF_ReferenceTypeUndefinedNonLazy = 0x0000,
+ SF_ReferenceTypeUndefinedLazy = 0x0001,
+ SF_ReferenceTypeDefined = 0x0002,
+ SF_ReferenceTypePrivateDefined = 0x0003,
+ SF_ReferenceTypePrivateUndefinedNonLazy = 0x0004,
+ SF_ReferenceTypePrivateUndefinedLazy = 0x0005,
+ // Other 'desc' flags.
+ SF_ThumbFunc = 0x0008,
+ SF_NoDeadStrip = 0x0020,
+ SF_WeakReference = 0x0040,
+ SF_WeakDefinition = 0x0080,
+ SF_SymbolResolver = 0x0100,
+ // Common alignment
+ SF_CommonAlignmentMask = 0xF0FF,
+ SF_CommonAlignmentShift = 8
+ };
+ MCSymbolMachO(const StringMapEntry<bool> *Name, bool isTemporary)
+ : MCSymbol(SymbolKindMachO, Name, isTemporary) {}
+ // Reference type methods.
+ void clearReferenceType() const {
+ modifyFlags(0, SF_ReferenceTypeMask);
+ }
+ void setReferenceTypeUndefinedLazy(bool Value) const {
+ modifyFlags(Value ? SF_ReferenceTypeUndefinedLazy : 0,
+ SF_ReferenceTypeUndefinedLazy);
+ }
+ // Other 'desc' methods.
+ void setThumbFunc() const {
+ modifyFlags(SF_ThumbFunc, SF_ThumbFunc);
+ }
+ bool isNoDeadStrip() const {
+ return getFlags() & SF_NoDeadStrip;
+ }
+ void setNoDeadStrip() const {
+ modifyFlags(SF_NoDeadStrip, SF_NoDeadStrip);
+ }
+ bool isWeakReference() const {
+ return getFlags() & SF_WeakReference;
+ }
+ void setWeakReference() const {
+ modifyFlags(SF_WeakReference, SF_WeakReference);
+ }
+ bool isWeakDefinition() const {
+ return getFlags() & SF_WeakDefinition;
+ }
+ void setWeakDefinition() const {
+ modifyFlags(SF_WeakDefinition, SF_WeakDefinition);
+ }
+ bool isSymbolResolver() const {
+ return getFlags() & SF_SymbolResolver;
+ }
+ void setSymbolResolver() const {
+ modifyFlags(SF_SymbolResolver, SF_SymbolResolver);
+ }
+ void setDesc(unsigned Value) const {
+ assert(Value == (Value & SF_DescFlagsMask) &&
+ "Invalid .desc value!");
+ setFlags(Value & SF_DescFlagsMask);
+ }
+ /// \brief Get the encoded value of the flags as they will be emitted in to
+ /// the MachO binary
+ uint16_t getEncodedFlags() const {
+ uint16_t Flags = getFlags();
+ // Common alignment is packed into the 'desc' bits.
+ if (isCommon()) {
+ if (unsigned Align = getCommonAlignment()) {
+ unsigned Log2Size = Log2_32(Align);
+ assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
+ if (Log2Size > 15)
+ report_fatal_error("invalid 'common' alignment '" +
+ Twine(Align) + "' for '" + getName() + "'",
+ false);
+ Flags = (Flags & SF_CommonAlignmentMask) |
+ (Log2Size << SF_CommonAlignmentShift);
+ }
+ }
+ return Flags;
+ }
+ static bool classof(const MCSymbol *S) { return S->isMachO(); }
diff --git a/include/llvm/MC/MCWinCOFFStreamer.h b/include/llvm/MC/MCWinCOFFStreamer.h
index 6a83e02..6fbc754 100644
--- a/include/llvm/MC/MCWinCOFFStreamer.h
+++ b/include/llvm/MC/MCWinCOFFStreamer.h
@@ -50,9 +50,9 @@ public:
void EmitCOFFSymbolStorageClass(int StorageClass) override;
void EmitCOFFSymbolType(int Type) override;
void EndCOFFSymbolDef() override;
+ void EmitCOFFSafeSEH(MCSymbol const *Symbol) override;
void EmitCOFFSectionIndex(MCSymbol const *Symbol) override;
void EmitCOFFSecRel32(MCSymbol const *Symbol) override;
- void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) override;
void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override;
void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
diff --git a/include/llvm/MC/SubtargetFeature.h b/include/llvm/MC/SubtargetFeature.h
index 6a631ff..2fb9b4a 100644
--- a/include/llvm/MC/SubtargetFeature.h
+++ b/include/llvm/MC/SubtargetFeature.h
@@ -103,6 +103,10 @@ public:
FeatureBitset ToggleFeature(FeatureBitset Bits, StringRef String,
ArrayRef<SubtargetFeatureKV> FeatureTable);
+ /// Apply the feature flag and return the newly updated feature bits.
+ FeatureBitset ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature,
+ ArrayRef<SubtargetFeatureKV> FeatureTable);
/// Get feature bits of a CPU.
FeatureBitset getFeatureBits(StringRef CPU,
ArrayRef<SubtargetFeatureKV> CPUTable,
diff --git a/include/llvm/Object/ArchiveWriter.h b/include/llvm/Object/ArchiveWriter.h
new file mode 100644
index 0000000..1616e46
--- /dev/null
+++ b/include/llvm/Object/ArchiveWriter.h
@@ -0,0 +1,51 @@
+//===- ArchiveWriter.h - ar archive file format writer ----------*- C++ -*-===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// Declares the writeArchive function for writing an archive file.
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Support/FileSystem.h"
+namespace llvm {
+class NewArchiveIterator {
+ bool IsNewMember;
+ StringRef Name;
+ object::Archive::child_iterator OldI;
+ StringRef NewFilename;
+ NewArchiveIterator(object::Archive::child_iterator I, StringRef Name);
+ NewArchiveIterator(StringRef I, StringRef Name);
+ NewArchiveIterator();
+ bool isNewMember() const;
+ StringRef getName() const;
+ object::Archive::child_iterator getOld() const;
+ StringRef getNew() const;
+ llvm::ErrorOr<int> getFD(sys::fs::file_status &NewStatus) const;
+ const sys::fs::file_status &getStatus() const;
+std::pair<StringRef, std::error_code>
+writeArchive(StringRef ArcName, std::vector<NewArchiveIterator> &NewMembers,
+ bool WriteSymtab);
diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h
index ccac020..564eb7a 100644
--- a/include/llvm/Object/COFF.h
+++ b/include/llvm/Object/COFF.h
@@ -613,7 +613,7 @@ protected:
StringRef &Res) const override;
std::error_code getSymbolAddress(DataRefImpl Symb,
uint64_t &Res) const override;
- std::error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const override;
+ uint64_t getSymbolSize(DataRefImpl Symb) const override;
uint32_t getSymbolFlags(DataRefImpl Symb) const override;
std::error_code getSymbolType(DataRefImpl Symb,
SymbolRef::Type &Res) const override;
@@ -647,10 +647,6 @@ protected:
getRelocationTypeName(DataRefImpl Rel,
SmallVectorImpl<char> &Result) const override;
- std::error_code
- getRelocationValueString(DataRefImpl Rel,
- SmallVectorImpl<char> &Result) const override;
COFFObjectFile(MemoryBufferRef Object, std::error_code &EC);
basic_symbol_iterator symbol_begin_impl() const override;
@@ -699,7 +695,7 @@ public:
return object_error::parse_failed;
Res = reinterpret_cast<coff_symbol_type *>(getSymbolTable()) + Index;
- return object_error::success;
+ return std::error_code();
ErrorOr<COFFSymbolRef> getSymbol(uint32_t index) const {
if (SymbolTable16) {
@@ -722,7 +718,7 @@ public:
if (std::error_code EC = s.getError())
return EC;
Res = reinterpret_cast<const T *>(s->getRawPtr());
- return object_error::success;
+ return std::error_code();
std::error_code getSymbolName(COFFSymbolRef Symbol, StringRef &Res) const;
diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h
index ddabf59..e87737d 100644
--- a/include/llvm/Object/ELF.h
+++ b/include/llvm/Object/ELF.h
@@ -318,7 +318,7 @@ public:
std::pair<const Elf_Shdr *, const Elf_Sym *>
getRelocationSymbol(const Elf_Shdr *RelSec, const RelT *Rel) const;
- ELFFile(StringRef Object, std::error_code &ec);
+ ELFFile(StringRef Object, std::error_code &EC);
bool isMipsELF64() const {
return Header->e_machine == ELF::EM_MIPS &&
@@ -423,12 +423,10 @@ public:
StringRef getLoadName() const;
-// Use an alignment of 2 for the typedefs since that is the worst case for
-// ELF files in archives.
-typedef ELFFile<ELFType<support::little, 2, false> > ELF32LEFile;
-typedef ELFFile<ELFType<support::little, 2, true> > ELF64LEFile;
-typedef ELFFile<ELFType<support::big, 2, false> > ELF32BEFile;
-typedef ELFFile<ELFType<support::big, 2, true> > ELF64BEFile;
+typedef ELFFile<ELFType<support::little, false>> ELF32LEFile;
+typedef ELFFile<ELFType<support::little, true>> ELF64LEFile;
+typedef ELFFile<ELFType<support::big, false>> ELF32BEFile;
+typedef ELFFile<ELFType<support::big, true>> ELF64BEFile;
// Iterate through the version definitions, and place each Elf_Verdef
// in the VersionMap according to its index.
@@ -622,7 +620,7 @@ typename ELFFile<ELFT>::uintX_t ELFFile<ELFT>::getStringTableIndex() const {
template <class ELFT>
-ELFFile<ELFT>::ELFFile(StringRef Object, std::error_code &ec)
+ELFFile<ELFT>::ELFFile(StringRef Object, std::error_code &EC)
: Buf(Object), SectionHeaderTable(nullptr), dot_shstrtab_sec(nullptr),
dot_strtab_sec(nullptr), dot_symtab_sec(nullptr),
SymbolTableSectionHeaderIndex(nullptr), dot_gnu_version_sec(nullptr),
@@ -630,9 +628,11 @@ ELFFile<ELFT>::ELFFile(StringRef Object, std::error_code &ec)
dt_soname(nullptr) {
const uint64_t FileSize = Buf.size();
- if (sizeof(Elf_Ehdr) > FileSize)
- // FIXME: Proper error handling.
- report_fatal_error("File too short!");
+ if (sizeof(Elf_Ehdr) > FileSize) {
+ // File too short!
+ EC = object_error::parse_failed;
+ return;
+ }
Header = reinterpret_cast<const Elf_Ehdr *>(base());
@@ -641,40 +641,50 @@ ELFFile<ELFT>::ELFFile(StringRef Object, std::error_code &ec)
const uint64_t SectionTableOffset = Header->e_shoff;
- if (SectionTableOffset + sizeof(Elf_Shdr) > FileSize)
- // FIXME: Proper error handling.
- report_fatal_error("Section header table goes past end of file!");
+ if (SectionTableOffset + sizeof(Elf_Shdr) > FileSize) {
+ // Section header table goes past end of file!
+ EC = object_error::parse_failed;
+ return;
+ }
// The getNumSections() call below depends on SectionHeaderTable being set.
SectionHeaderTable =
reinterpret_cast<const Elf_Shdr *>(base() + SectionTableOffset);
const uint64_t SectionTableSize = getNumSections() * Header->e_shentsize;
- if (SectionTableOffset + SectionTableSize > FileSize)
- // FIXME: Proper error handling.
- report_fatal_error("Section table goes past end of file!");
+ if (SectionTableOffset + SectionTableSize > FileSize) {
+ // Section table goes past end of file!
+ EC = object_error::parse_failed;
+ return;
+ }
// Scan sections for special sections.
for (const Elf_Shdr &Sec : sections()) {
switch (Sec.sh_type) {
- if (SymbolTableSectionHeaderIndex)
- // FIXME: Proper error handling.
- report_fatal_error("More than one .symtab_shndx!");
+ if (SymbolTableSectionHeaderIndex) {
+ // More than one .symtab_shndx!
+ EC = object_error::parse_failed;
+ return;
+ }
SymbolTableSectionHeaderIndex = &Sec;
- if (dot_symtab_sec)
- // FIXME: Proper error handling.
- report_fatal_error("More than one .symtab!");
+ if (dot_symtab_sec) {
+ // More than one .symtab!
+ EC = object_error::parse_failed;
+ return;
+ }
dot_symtab_sec = &Sec;
dot_strtab_sec = getSection(Sec.sh_link);
- if (DynSymRegion.Addr)
- // FIXME: Proper error handling.
- report_fatal_error("More than one .dynsym!");
+ if (DynSymRegion.Addr) {
+ // More than one .dynsym!
+ EC = object_error::parse_failed;
+ return;
+ }
DynSymRegion.Addr = base() + Sec.sh_offset;
DynSymRegion.Size = Sec.sh_size;
DynSymRegion.EntSize = Sec.sh_entsize;
@@ -685,29 +695,37 @@ ELFFile<ELFT>::ELFFile(StringRef Object, std::error_code &ec)
- if (DynamicRegion.Addr)
- // FIXME: Proper error handling.
- report_fatal_error("More than one .dynamic!");
+ if (DynamicRegion.Addr) {
+ // More than one .dynamic!
+ EC = object_error::parse_failed;
+ return;
+ }
DynamicRegion.Addr = base() + Sec.sh_offset;
DynamicRegion.Size = Sec.sh_size;
DynamicRegion.EntSize = Sec.sh_entsize;
case ELF::SHT_GNU_versym:
- if (dot_gnu_version_sec != nullptr)
- // FIXME: Proper error handling.
- report_fatal_error("More than one .gnu.version section!");
+ if (dot_gnu_version_sec != nullptr) {
+ // More than one .gnu.version section!
+ EC = object_error::parse_failed;
+ return;
+ }
dot_gnu_version_sec = &Sec;
case ELF::SHT_GNU_verdef:
- if (dot_gnu_version_d_sec != nullptr)
- // FIXME: Proper error handling.
- report_fatal_error("More than one .gnu.version_d section!");
+ if (dot_gnu_version_d_sec != nullptr) {
+ // More than one .gnu.version_d section!
+ EC = object_error::parse_failed;
+ return;
+ }
dot_gnu_version_d_sec = &Sec;
case ELF::SHT_GNU_verneed:
- if (dot_gnu_version_r_sec != nullptr)
- // FIXME: Proper error handling.
- report_fatal_error("More than one .gnu.version_r section!");
+ if (dot_gnu_version_r_sec != nullptr) {
+ // More than one .gnu.version_r section!
+ EC = object_error::parse_failed;
+ return;
+ }
dot_gnu_version_r_sec = &Sec;
@@ -744,7 +762,7 @@ ELFFile<ELFT>::ELFFile(StringRef Object, std::error_code &ec)
- ec = std::error_code();
+ EC = std::error_code();
// Get the symbol table index in the symtab section given a symbol
@@ -898,11 +916,8 @@ ErrorOr<StringRef> ELFFile<ELFT>::getSymbolName(Elf_Sym_Iter Sym) const {
template <class ELFT>
ErrorOr<StringRef> ELFFile<ELFT>::getSymbolName(const Elf_Shdr *Section,
const Elf_Sym *Symb) const {
- if (Symb->st_name == 0) {
- const Elf_Shdr *ContainingSec = getSection(Symb);
- if (ContainingSec)
- return getSectionName(ContainingSec);
- }
+ if (Symb->st_name == 0)
+ return StringRef("");
const Elf_Shdr *StrTab = getSection(Section->sh_link);
if (Symb->st_name >= StrTab->sh_size)
diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h
index 9bd4c32..78d77be 100644
--- a/include/llvm/Object/ELFObjectFile.h
+++ b/include/llvm/Object/ELFObjectFile.h
@@ -79,9 +79,8 @@ protected:
StringRef &Res) const override;
std::error_code getSymbolAddress(DataRefImpl Symb,
uint64_t &Res) const override;
- std::error_code getSymbolAlignment(DataRefImpl Symb,
- uint32_t &Res) const override;
- std::error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const override;
+ uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
+ uint64_t getSymbolSize(DataRefImpl Symb) const override;
uint32_t getSymbolFlags(DataRefImpl Symb) const override;
std::error_code getSymbolOther(DataRefImpl Symb, uint8_t &Res) const override;
std::error_code getSymbolType(DataRefImpl Symb,
@@ -119,9 +118,6 @@ protected:
getRelocationTypeName(DataRefImpl Rel,
SmallVectorImpl<char> &Result) const override;
- std::error_code
- getRelocationValueString(DataRefImpl Rel,
- SmallVectorImpl<char> &Result) const override;
uint64_t getROffset(DataRefImpl Rel) const;
StringRef getRelocationTypeName(uint32_t Type) const;
@@ -227,7 +223,7 @@ public:
std::error_code getPlatformFlags(unsigned &Result) const override {
Result = EF.getHeader()->e_flags;
- return object_error::success;
+ return std::error_code();
const ELFFile<ELFT> *getELFFile() const { return &EF; }
@@ -244,12 +240,10 @@ public:
bool isRelocatableObject() const override;
-// Use an alignment of 2 for the typedefs since that is the worst case for
-// ELF files in archives.
-typedef ELFObjectFile<ELFType<support::little, 2, false> > ELF32LEObjectFile;
-typedef ELFObjectFile<ELFType<support::little, 2, true> > ELF64LEObjectFile;
-typedef ELFObjectFile<ELFType<support::big, 2, false> > ELF32BEObjectFile;
-typedef ELFObjectFile<ELFType<support::big, 2, true> > ELF64BEObjectFile;
+typedef ELFObjectFile<ELFType<support::little, false>> ELF32LEObjectFile;
+typedef ELFObjectFile<ELFType<support::little, true>> ELF64LEObjectFile;
+typedef ELFObjectFile<ELFType<support::big, false>> ELF32BEObjectFile;
+typedef ELFObjectFile<ELFType<support::big, true>> ELF64BEObjectFile;
template <class ELFT>
void ELFObjectFile<ELFT>::moveSymbolNext(DataRefImpl &Symb) const {
@@ -263,7 +257,7 @@ std::error_code ELFObjectFile<ELFT>::getSymbolName(DataRefImpl Symb,
if (!Name)
return Name.getError();
Result = *Name;
- return object_error::success;
+ return std::error_code();
template <class ELFT>
@@ -277,7 +271,7 @@ std::error_code ELFObjectFile<ELFT>::getSymbolVersion(SymbolRef SymRef,
if (!Ver)
return Ver.getError();
Version = *Ver;
- return object_error::success;
+ return std::error_code();
template <class ELFT>
@@ -300,10 +294,10 @@ std::error_code ELFObjectFile<ELFT>::getSymbolAddress(DataRefImpl Symb,
Result = UnknownAddressOrSize;
- return object_error::success;
+ return std::error_code();
case ELF::SHN_ABS:
Result = ESym->st_value;
- return object_error::success;
+ return std::error_code();
@@ -322,32 +316,27 @@ std::error_code ELFObjectFile<ELFT>::getSymbolAddress(DataRefImpl Symb,
Result += Section->sh_addr;
- return object_error::success;
+ return std::error_code();
template <class ELFT>
-std::error_code ELFObjectFile<ELFT>::getSymbolAlignment(DataRefImpl Symb,
- uint32_t &Res) const {
+uint32_t ELFObjectFile<ELFT>::getSymbolAlignment(DataRefImpl Symb) const {
Elf_Sym_Iter Sym = toELFSymIter(Symb);
if (Sym->st_shndx == ELF::SHN_COMMON)
- Res = Sym->st_value;
- else
- Res = 0;
- return object_error::success;
+ return Sym->st_value;
+ return 0;
template <class ELFT>
-std::error_code ELFObjectFile<ELFT>::getSymbolSize(DataRefImpl Symb,
- uint64_t &Result) const {
- Result = toELFSymIter(Symb)->st_size;
- return object_error::success;
+uint64_t ELFObjectFile<ELFT>::getSymbolSize(DataRefImpl Symb) const {
+ return toELFSymIter(Symb)->st_size;
template <class ELFT>
std::error_code ELFObjectFile<ELFT>::getSymbolOther(DataRefImpl Symb,
uint8_t &Result) const {
Result = toELFSymIter(Symb)->st_other;
- return object_error::success;
+ return std::error_code();
template <class ELFT>
@@ -378,7 +367,7 @@ ELFObjectFile<ELFT>::getSymbolType(DataRefImpl Symb,
Result = SymbolRef::ST_Other;
- return object_error::success;
+ return std::error_code();
template <class ELFT>
@@ -435,7 +424,7 @@ std::error_code
ELFObjectFile<ELFT>::getSymbolSection(DataRefImpl Symb,
section_iterator &Res) const {
Res = getSymbolSection(getSymbol(Symb));
- return object_error::success;
+ return std::error_code();
template <class ELFT>
@@ -450,7 +439,7 @@ std::error_code ELFObjectFile<ELFT>::getSectionName(DataRefImpl Sec,
if (!Name)
return Name.getError();
Result = *Name;
- return object_error::success;
+ return std::error_code();
template <class ELFT>
@@ -469,7 +458,7 @@ ELFObjectFile<ELFT>::getSectionContents(DataRefImpl Sec,
StringRef &Result) const {
Elf_Shdr_Iter EShdr = toELFShdrIter(Sec);
Result = StringRef((const char *)base() + EShdr->sh_offset, EShdr->sh_size);
- return object_error::success;
+ return std::error_code();
template <class ELFT>
@@ -624,7 +613,7 @@ ELFObjectFile<ELFT>::getRelocationAddress(DataRefImpl Rel,
Result = ROffset;
- return object_error::success;
+ return std::error_code();
template <class ELFT>
@@ -634,7 +623,7 @@ ELFObjectFile<ELFT>::getRelocationOffset(DataRefImpl Rel,
assert(EF.getHeader()->e_type == ELF::ET_REL &&
"Only relocatable object files have relocation offsets");
Result = getROffset(Rel);
- return object_error::success;
+ return std::error_code();
template <class ELFT>
@@ -666,7 +655,7 @@ std::error_code ELFObjectFile<ELFT>::getRelocationType(DataRefImpl Rel,
- return object_error::success;
+ return std::error_code();
template <class ELFT>
@@ -693,7 +682,7 @@ std::error_code ELFObjectFile<ELFT>::getRelocationTypeName(
EF.getRelocationTypeName(type, Result);
- return object_error::success;
+ return std::error_code();
template <class ELFT>
@@ -706,94 +695,13 @@ ELFObjectFile<ELFT>::getRelocationAddend(DataRefImpl Rel,
report_fatal_error("Invalid section type in Rel!");
case ELF::SHT_REL: {
Result = 0;
- return object_error::success;
+ return std::error_code();
case ELF::SHT_RELA: {
Result = getRela(Rel)->r_addend;
- return object_error::success;
- }
+ return std::error_code();
-template <class ELFT>
-std::error_code ELFObjectFile<ELFT>::getRelocationValueString(
- DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
- const Elf_Shdr *sec = getRelSection(Rel);
- uint8_t type;
- StringRef res;
- int64_t addend = 0;
- uint16_t symbol_index = 0;
- switch (sec->sh_type) {
- default:
- return object_error::parse_failed;
- case ELF::SHT_REL: {
- type = getRel(Rel)->getType(EF.isMips64EL());
- symbol_index = getRel(Rel)->getSymbol(EF.isMips64EL());
- // TODO: Read implicit addend from section data.
- break;
- }
- case ELF::SHT_RELA: {
- type = getRela(Rel)->getType(EF.isMips64EL());
- symbol_index = getRela(Rel)->getSymbol(EF.isMips64EL());
- addend = getRela(Rel)->r_addend;
- break;
- }
- }
- const Elf_Sym *symb =
- EF.template getEntry<Elf_Sym>(sec->sh_link, symbol_index);
- ErrorOr<StringRef> SymName =
- EF.getSymbolName(EF.getSection(sec->sh_link), symb);
- if (!SymName)
- return SymName.getError();
- switch (EF.getHeader()->e_machine) {
- case ELF::EM_X86_64:
- switch (type) {
- case ELF::R_X86_64_PC8:
- case ELF::R_X86_64_PC16:
- case ELF::R_X86_64_PC32: {
- std::string fmtbuf;
- raw_string_ostream fmt(fmtbuf);
- fmt << *SymName << (addend < 0 ? "" : "+") << addend << "-P";
- fmt.flush();
- Result.append(fmtbuf.begin(), fmtbuf.end());
- } break;
- case ELF::R_X86_64_8:
- case ELF::R_X86_64_16:
- case ELF::R_X86_64_32:
- case ELF::R_X86_64_32S:
- case ELF::R_X86_64_64: {
- std::string fmtbuf;
- raw_string_ostream fmt(fmtbuf);
- fmt << *SymName << (addend < 0 ? "" : "+") << addend;
- fmt.flush();
- Result.append(fmtbuf.begin(), fmtbuf.end());
- } break;
- default:
- res = "Unknown";
- }
- break;
- case ELF::EM_AARCH64: {
- std::string fmtbuf;
- raw_string_ostream fmt(fmtbuf);
- fmt << *SymName;
- if (addend != 0)
- fmt << (addend < 0 ? "" : "+") << addend;
- fmt.flush();
- Result.append(fmtbuf.begin(), fmtbuf.end());
- break;
- }
- case ELF::EM_386:
- case ELF::EM_ARM:
- case ELF::EM_MIPS:
- res = *SymName;
- break;
- default:
- res = "Unknown";
- if (Result.empty())
- Result.append(res.begin(), res.end());
- return object_error::success;
template <class ELFT>
diff --git a/include/llvm/Object/ELFTypes.h b/include/llvm/Object/ELFTypes.h
index 287d367..3f323b5 100644
--- a/include/llvm/Object/ELFTypes.h
+++ b/include/llvm/Object/ELFTypes.h
@@ -10,7 +10,6 @@
-#include "llvm/Support/AlignOf.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/Endian.h"
@@ -20,95 +19,74 @@ namespace object {
using support::endianness;
-template <endianness target_endianness, std::size_t max_alignment,
- bool is64Bits>
-struct ELFType {
+template <endianness target_endianness, bool is64Bits> struct ELFType {
static const endianness TargetEndianness = target_endianness;
- static const std::size_t MaxAlignment = max_alignment;
static const bool Is64Bits = is64Bits;
-template <typename T, int max_align> struct MaximumAlignment {
- enum { value = AlignOf<T>::Alignment > max_align ? max_align
- : AlignOf<T>::Alignment
- };
+// Use an alignment of 2 for the typedefs since that is the worst case for
+// ELF files in archives.
// Templates to choose Elf_Addr and Elf_Off depending on is64Bits.
-template <endianness target_endianness, std::size_t max_alignment>
-struct ELFDataTypeTypedefHelperCommon {
+template <endianness target_endianness> struct ELFDataTypeTypedefHelperCommon {
typedef support::detail::packed_endian_specific_integral<
- uint16_t, target_endianness,
- MaximumAlignment<uint16_t, max_alignment>::value> Elf_Half;
+ uint16_t, target_endianness, 2> Elf_Half;
typedef support::detail::packed_endian_specific_integral<
- uint32_t, target_endianness,
- MaximumAlignment<uint32_t, max_alignment>::value> Elf_Word;
+ uint32_t, target_endianness, 2> Elf_Word;
typedef support::detail::packed_endian_specific_integral<
- int32_t, target_endianness,
- MaximumAlignment<int32_t, max_alignment>::value> Elf_Sword;
+ int32_t, target_endianness, 2> Elf_Sword;
typedef support::detail::packed_endian_specific_integral<
- uint64_t, target_endianness,
- MaximumAlignment<uint64_t, max_alignment>::value> Elf_Xword;
+ uint64_t, target_endianness, 2> Elf_Xword;
typedef support::detail::packed_endian_specific_integral<
- int64_t, target_endianness,
- MaximumAlignment<int64_t, max_alignment>::value> Elf_Sxword;
+ int64_t, target_endianness, 2> Elf_Sxword;
template <class ELFT> struct ELFDataTypeTypedefHelper;
/// ELF 32bit types.
-template <endianness TargetEndianness, std::size_t MaxAlign>
-struct ELFDataTypeTypedefHelper<ELFType<TargetEndianness, MaxAlign, false> >
- : ELFDataTypeTypedefHelperCommon<TargetEndianness, MaxAlign> {
+template <endianness TargetEndianness>
+struct ELFDataTypeTypedefHelper<ELFType<TargetEndianness, false>>
+ : ELFDataTypeTypedefHelperCommon<TargetEndianness> {
typedef uint32_t value_type;
typedef support::detail::packed_endian_specific_integral<
- value_type, TargetEndianness,
- MaximumAlignment<value_type, MaxAlign>::value> Elf_Addr;
+ value_type, TargetEndianness, 2> Elf_Addr;
typedef support::detail::packed_endian_specific_integral<
- value_type, TargetEndianness,
- MaximumAlignment<value_type, MaxAlign>::value> Elf_Off;
+ value_type, TargetEndianness, 2> Elf_Off;
/// ELF 64bit types.
-template <endianness TargetEndianness, std::size_t MaxAlign>
-struct ELFDataTypeTypedefHelper<ELFType<TargetEndianness, MaxAlign, true> >
- : ELFDataTypeTypedefHelperCommon<TargetEndianness, MaxAlign> {
+template <endianness TargetEndianness>
+struct ELFDataTypeTypedefHelper<ELFType<TargetEndianness, true>>
+ : ELFDataTypeTypedefHelperCommon<TargetEndianness> {
typedef uint64_t value_type;
typedef support::detail::packed_endian_specific_integral<
- value_type, TargetEndianness,
- MaximumAlignment<value_type, MaxAlign>::value> Elf_Addr;
+ value_type, TargetEndianness, 2> Elf_Addr;
typedef support::detail::packed_endian_specific_integral<
- value_type, TargetEndianness,
- MaximumAlignment<value_type, MaxAlign>::value> Elf_Off;
+ value_type, TargetEndianness, 2> Elf_Off;
// I really don't like doing this, but the alternative is copypasta.
-typedef typename ELFDataTypeTypedefHelper<ELFType<E, M, W> >::Elf_Addr \
- Elf_Addr; \
-typedef typename ELFDataTypeTypedefHelper<ELFType<E, M, W> >::Elf_Off \
- Elf_Off; \
-typedef typename ELFDataTypeTypedefHelper<ELFType<E, M, W> >::Elf_Half \
- Elf_Half; \
-typedef typename ELFDataTypeTypedefHelper<ELFType<E, M, W> >::Elf_Word \
- Elf_Word; \
-typedef typename ELFDataTypeTypedefHelper<ELFType<E, M, W> >::Elf_Sword \
- Elf_Sword; \
-typedef typename ELFDataTypeTypedefHelper<ELFType<E, M, W> >::Elf_Xword \
- Elf_Xword; \
-typedef typename ELFDataTypeTypedefHelper<ELFType<E, M, W> >::Elf_Sxword \
- Elf_Sxword;
+ typedef typename ELFDataTypeTypedefHelper<ELFType<E, W>>::Elf_Addr Elf_Addr; \
+ typedef typename ELFDataTypeTypedefHelper<ELFType<E, W>>::Elf_Off Elf_Off; \
+ typedef typename ELFDataTypeTypedefHelper<ELFType<E, W>>::Elf_Half Elf_Half; \
+ typedef typename ELFDataTypeTypedefHelper<ELFType<E, W>>::Elf_Word Elf_Word; \
+ typedef \
+ typename ELFDataTypeTypedefHelper<ELFType<E, W>>::Elf_Sword Elf_Sword; \
+ typedef \
+ typename ELFDataTypeTypedefHelper<ELFType<E, W>>::Elf_Xword Elf_Xword; \
+ typedef \
+ typename ELFDataTypeTypedefHelper<ELFType<E, W>>::Elf_Sxword Elf_Sxword;
- LLVM_ELF_IMPORT_TYPES(ELFT::TargetEndianness, ELFT::MaxAlignment, \
- ELFT::Is64Bits)
+ LLVM_ELF_IMPORT_TYPES(ELFT::TargetEndianness, ELFT::Is64Bits)
// Section header.
template <class ELFT> struct Elf_Shdr_Base;
-template <endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Shdr_Base<ELFType<TargetEndianness, MaxAlign, false> > {
- LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
+template <endianness TargetEndianness>
+struct Elf_Shdr_Base<ELFType<TargetEndianness, false>> {
+ LLVM_ELF_IMPORT_TYPES(TargetEndianness, false)
Elf_Word sh_name; // Section name (index into string table)
Elf_Word sh_type; // Section type (SHT_*)
Elf_Word sh_flags; // Section flags (SHF_*)
@@ -121,9 +99,9 @@ struct Elf_Shdr_Base<ELFType<TargetEndianness, MaxAlign, false> > {
Elf_Word sh_entsize; // Size of records contained within the section
-template <endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Shdr_Base<ELFType<TargetEndianness, MaxAlign, true> > {
- LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
+template <endianness TargetEndianness>
+struct Elf_Shdr_Base<ELFType<TargetEndianness, true>> {
+ LLVM_ELF_IMPORT_TYPES(TargetEndianness, true)
Elf_Word sh_name; // Section name (index into string table)
Elf_Word sh_type; // Section type (SHT_*)
Elf_Xword sh_flags; // Section flags (SHF_*)
@@ -151,9 +129,9 @@ struct Elf_Shdr_Impl : Elf_Shdr_Base<ELFT> {
template <class ELFT> struct Elf_Sym_Base;
-template <endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Sym_Base<ELFType<TargetEndianness, MaxAlign, false> > {
- LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
+template <endianness TargetEndianness>
+struct Elf_Sym_Base<ELFType<TargetEndianness, false>> {
+ LLVM_ELF_IMPORT_TYPES(TargetEndianness, false)
Elf_Word st_name; // Symbol name (index into string table)
Elf_Addr st_value; // Value or address associated with the symbol
Elf_Word st_size; // Size of the symbol
@@ -162,9 +140,9 @@ struct Elf_Sym_Base<ELFType<TargetEndianness, MaxAlign, false> > {
Elf_Half st_shndx; // Which section (header table index) it's defined in
-template <endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Sym_Base<ELFType<TargetEndianness, MaxAlign, true> > {
- LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
+template <endianness TargetEndianness>
+struct Elf_Sym_Base<ELFType<TargetEndianness, true>> {
+ LLVM_ELF_IMPORT_TYPES(TargetEndianness, true)
Elf_Word st_name; // Symbol name (index into string table)
unsigned char st_info; // Symbol's type and binding attributes
unsigned char st_other; // Must be zero; reserved
@@ -176,6 +154,7 @@ struct Elf_Sym_Base<ELFType<TargetEndianness, MaxAlign, true> > {
template <class ELFT>
struct Elf_Sym_Impl : Elf_Sym_Base<ELFT> {
using Elf_Sym_Base<ELFT>::st_info;
+ using Elf_Sym_Base<ELFT>::st_shndx;
using Elf_Sym_Base<ELFT>::st_other;
// These accessors and mutators correspond to the ELF32_ST_BIND,
@@ -198,6 +177,25 @@ struct Elf_Sym_Impl : Elf_Sym_Base<ELFT> {
assert(v < 4 && "Invalid value for visibility");
st_other = (st_other & ~0x3) | v;
+ bool isAbsolute() const { return st_shndx == ELF::SHN_ABS; }
+ bool isCommon() const {
+ return getType() == ELF::STT_COMMON || st_shndx == ELF::SHN_COMMON;
+ }
+ bool isDefined() const {
+ return !isUndefined() &&
+ !(st_shndx >= ELF::SHN_LORESERVE && st_shndx < ELF::SHN_ABS);
+ }
+ bool isProcessorSpecific() const {
+ return st_shndx >= ELF::SHN_LOPROC && st_shndx <= ELF::SHN_HIPROC;
+ }
+ bool isOSSpecific() const {
+ return st_shndx >= ELF::SHN_LOOS && st_shndx <= ELF::SHN_HIOS;
+ }
+ bool isReserved() const {
+ return st_shndx > ELF::SHN_HIOS && st_shndx < ELF::SHN_ABS;
+ }
+ bool isUndefined() const { return st_shndx == ELF::SHN_UNDEF; }
/// Elf_Versym: This is the structure of entries in the SHT_GNU_versym section
@@ -267,9 +265,9 @@ struct Elf_Vernaux_Impl {
/// table section (.dynamic) look like.
template <class ELFT> struct Elf_Dyn_Base;
-template <endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Dyn_Base<ELFType<TargetEndianness, MaxAlign, false> > {
- LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
+template <endianness TargetEndianness>
+struct Elf_Dyn_Base<ELFType<TargetEndianness, false>> {
+ LLVM_ELF_IMPORT_TYPES(TargetEndianness, false)
Elf_Sword d_tag;
union {
Elf_Word d_val;
@@ -277,9 +275,9 @@ struct Elf_Dyn_Base<ELFType<TargetEndianness, MaxAlign, false> > {
} d_un;
-template <endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Dyn_Base<ELFType<TargetEndianness, MaxAlign, true> > {
- LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
+template <endianness TargetEndianness>
+struct Elf_Dyn_Base<ELFType<TargetEndianness, true>> {
+ LLVM_ELF_IMPORT_TYPES(TargetEndianness, true)
Elf_Sxword d_tag;
union {
Elf_Xword d_val;
@@ -300,9 +298,9 @@ struct Elf_Dyn_Impl : Elf_Dyn_Base<ELFT> {
// Elf_Rel: Elf Relocation
template <class ELFT, bool isRela> struct Elf_Rel_Base;
-template <endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, false>, false> {
- LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
+template <endianness TargetEndianness>
+struct Elf_Rel_Base<ELFType<TargetEndianness, false>, false> {
+ LLVM_ELF_IMPORT_TYPES(TargetEndianness, false)
Elf_Addr r_offset; // Location (file byte offset, or program virtual addr)
Elf_Word r_info; // Symbol table index and type of relocation to apply
@@ -316,9 +314,9 @@ struct Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, false>, false> {
-template <endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, true>, false> {
- LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
+template <endianness TargetEndianness>
+struct Elf_Rel_Base<ELFType<TargetEndianness, true>, false> {
+ LLVM_ELF_IMPORT_TYPES(TargetEndianness, true)
Elf_Addr r_offset; // Location (file byte offset, or program virtual addr)
Elf_Xword r_info; // Symbol table index and type of relocation to apply
@@ -341,9 +339,9 @@ struct Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, true>, false> {
-template <endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, false>, true> {
- LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
+template <endianness TargetEndianness>
+struct Elf_Rel_Base<ELFType<TargetEndianness, false>, true> {
+ LLVM_ELF_IMPORT_TYPES(TargetEndianness, false)
Elf_Addr r_offset; // Location (file byte offset, or program virtual addr)
Elf_Word r_info; // Symbol table index and type of relocation to apply
Elf_Sword r_addend; // Compute value for relocatable field by adding this
@@ -358,9 +356,9 @@ struct Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, false>, true> {
-template <endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, true>, true> {
- LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
+template <endianness TargetEndianness>
+struct Elf_Rel_Base<ELFType<TargetEndianness, true>, true> {
+ LLVM_ELF_IMPORT_TYPES(TargetEndianness, true)
Elf_Addr r_offset; // Location (file byte offset, or program virtual addr)
Elf_Xword r_info; // Symbol table index and type of relocation to apply
Elf_Sxword r_addend; // Compute value for relocatable field by adding this.
@@ -386,11 +384,10 @@ struct Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, true>, true> {
template <class ELFT, bool isRela> struct Elf_Rel_Impl;
-template <endianness TargetEndianness, std::size_t MaxAlign, bool isRela>
-struct Elf_Rel_Impl<ELFType<TargetEndianness, MaxAlign, true>,
- isRela> : Elf_Rel_Base<
- ELFType<TargetEndianness, MaxAlign, true>, isRela> {
- LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
+template <endianness TargetEndianness, bool isRela>
+struct Elf_Rel_Impl<ELFType<TargetEndianness, true>, isRela>
+ : Elf_Rel_Base<ELFType<TargetEndianness, true>, isRela> {
+ LLVM_ELF_IMPORT_TYPES(TargetEndianness, true)
// These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE,
// and ELF64_R_INFO macros defined in the ELF specification:
@@ -411,11 +408,10 @@ struct Elf_Rel_Impl<ELFType<TargetEndianness, MaxAlign, true>,
-template <endianness TargetEndianness, std::size_t MaxAlign, bool isRela>
-struct Elf_Rel_Impl<ELFType<TargetEndianness, MaxAlign, false>,
- isRela> : Elf_Rel_Base<
- ELFType<TargetEndianness, MaxAlign, false>, isRela> {
- LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
+template <endianness TargetEndianness, bool isRela>
+struct Elf_Rel_Impl<ELFType<TargetEndianness, false>, isRela>
+ : Elf_Rel_Base<ELFType<TargetEndianness, false>, isRela> {
+ LLVM_ELF_IMPORT_TYPES(TargetEndianness, false)
// These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE,
// and ELF32_R_INFO macros defined in the ELF specification:
@@ -463,9 +459,9 @@ struct Elf_Ehdr_Impl {
template <class ELFT> struct Elf_Phdr_Impl;
-template <endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Phdr_Impl<ELFType<TargetEndianness, MaxAlign, false> > {
- LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
+template <endianness TargetEndianness>
+struct Elf_Phdr_Impl<ELFType<TargetEndianness, false>> {
+ LLVM_ELF_IMPORT_TYPES(TargetEndianness, false)
Elf_Word p_type; // Type of segment
Elf_Off p_offset; // FileOffset where segment is located, in bytes
Elf_Addr p_vaddr; // Virtual Address of beginning of segment
@@ -476,9 +472,9 @@ struct Elf_Phdr_Impl<ELFType<TargetEndianness, MaxAlign, false> > {
Elf_Word p_align; // Segment alignment constraint
-template <endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Phdr_Impl<ELFType<TargetEndianness, MaxAlign, true> > {
- LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
+template <endianness TargetEndianness>
+struct Elf_Phdr_Impl<ELFType<TargetEndianness, true>> {
+ LLVM_ELF_IMPORT_TYPES(TargetEndianness, true)
Elf_Word p_type; // Type of segment
Elf_Word p_flags; // Segment flags
Elf_Off p_offset; // FileOffset where segment is located, in bytes
@@ -493,17 +489,17 @@ struct Elf_Phdr_Impl<ELFType<TargetEndianness, MaxAlign, true> > {
template <class ELFT>
struct Elf_Mips_RegInfo;
-template <llvm::support::endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Mips_RegInfo<ELFType<TargetEndianness, MaxAlign, false>> {
- LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
+template <llvm::support::endianness TargetEndianness>
+struct Elf_Mips_RegInfo<ELFType<TargetEndianness, false>> {
+ LLVM_ELF_IMPORT_TYPES(TargetEndianness, false)
Elf_Word ri_gprmask; // bit-mask of used general registers
Elf_Word ri_cprmask[4]; // bit-mask of used co-processor registers
Elf_Addr ri_gp_value; // gp register value
-template <llvm::support::endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Mips_RegInfo<ELFType<TargetEndianness, MaxAlign, true>> {
- LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
+template <llvm::support::endianness TargetEndianness>
+struct Elf_Mips_RegInfo<ELFType<TargetEndianness, true>> {
+ LLVM_ELF_IMPORT_TYPES(TargetEndianness, true)
Elf_Word ri_gprmask; // bit-mask of used general registers
Elf_Word ri_pad; // unused padding field
Elf_Word ri_cprmask[4]; // bit-mask of used co-processor registers
diff --git a/include/llvm/Object/Error.h b/include/llvm/Object/Error.h
index 90c2bd7..c9db1b8 100644
--- a/include/llvm/Object/Error.h
+++ b/include/llvm/Object/Error.h
@@ -22,12 +22,15 @@ namespace object {
const std::error_category &object_category();
enum class object_error {
- success = 0,
- arch_not_found,
+ // Error code 0 is absent. Use std::error_code() instead.
+ arch_not_found = 1,
+ macho_small_load_command,
+ macho_load_segment_too_many_sections,
+ macho_load_segment_too_small,
inline std::error_code make_error_code(object_error e) {
diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h
index 0a9b62c..b163534 100644
--- a/include/llvm/Object/MachO.h
+++ b/include/llvm/Object/MachO.h
@@ -190,6 +190,8 @@ public:
const char *Ptr; // Where in memory the load command is.
MachO::load_command C; // The command itself.
+ typedef SmallVector<LoadCommandInfo, 4> LoadCommandList;
+ typedef LoadCommandList::const_iterator load_command_iterator;
MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits,
std::error_code &EC);
@@ -204,9 +206,8 @@ public:
std::error_code getSymbolAddress(DataRefImpl Symb,
uint64_t &Res) const override;
- std::error_code getSymbolAlignment(DataRefImpl Symb,
- uint32_t &Res) const override;
- std::error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const override;
+ uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
+ uint64_t getSymbolSize(DataRefImpl Symb) const override;
std::error_code getSymbolType(DataRefImpl Symb,
SymbolRef::Type &Res) const override;
uint32_t getSymbolFlags(DataRefImpl Symb) const override;
@@ -241,11 +242,9 @@ public:
getRelocationTypeName(DataRefImpl Rel,
SmallVectorImpl<char> &Result) const override;
- std::error_code
- getRelocationValueString(DataRefImpl Rel,
- SmallVectorImpl<char> &Result) const override;
std::error_code getRelocationHidden(DataRefImpl Rel,
bool &Result) const override;
+ uint8_t getRelocationLength(DataRefImpl Rel) const;
// MachO specific.
std::error_code getLibraryShortNameByIndex(unsigned Index, StringRef &) const;
@@ -273,10 +272,14 @@ public:
dice_iterator begin_dices() const;
dice_iterator end_dices() const;
+ load_command_iterator begin_load_commands() const;
+ load_command_iterator end_load_commands() const;
+ iterator_range<load_command_iterator> load_commands() const;
/// For use iterating over all exported symbols.
iterator_range<export_iterator> exports() const;
/// For use examining a trie not in a MachOObjectFile.
static iterator_range<export_iterator> exports(ArrayRef<uint8_t> Trie);
@@ -329,10 +332,6 @@ public:
unsigned getAnyRelocationType(const MachO::any_relocation_info &RE) const;
SectionRef getAnyRelocationSection(const MachO::any_relocation_info &RE) const;
- // Walk load commands.
- LoadCommandInfo getFirstLoadCommandInfo() const;
- LoadCommandInfo getNextLoadCommandInfo(const LoadCommandInfo &L) const;
// MachO specific structures.
MachO::section getSection(DataRefImpl DRI) const;
MachO::section_64 getSection64(DataRefImpl DRI) const;
@@ -386,8 +385,8 @@ public:
MachO::any_relocation_info getRelocation(DataRefImpl Rel) const;
MachO::data_in_code_entry getDice(DataRefImpl Rel) const;
- MachO::mach_header getHeader() const;
- MachO::mach_header_64 getHeader64() const;
+ const MachO::mach_header &getHeader() const;
+ const MachO::mach_header_64 &getHeader64() const;
getIndirectSymbolTableEntry(const MachO::dysymtab_command &DLC,
unsigned Index) const;
@@ -430,10 +429,15 @@ public:
+ union {
+ MachO::mach_header_64 Header64;
+ MachO::mach_header Header;
+ };
typedef SmallVector<const char*, 1> SectionList;
SectionList Sections;
typedef SmallVector<const char*, 1> LibraryList;
LibraryList Libraries;
+ LoadCommandList LoadCommands;
typedef SmallVector<StringRef, 1> LibraryShortName;
mutable LibraryShortName LibrariesShortNames;
const char *SymtabLoadCmd;
@@ -472,7 +476,7 @@ inline std::error_code DiceRef::getOffset(uint32_t &Result) const {
static_cast<const MachOObjectFile *>(OwningObject);
MachO::data_in_code_entry Dice = MachOOF->getDice(DicePimpl);
Result = Dice.offset;
- return object_error::success;
+ return std::error_code();
inline std::error_code DiceRef::getLength(uint16_t &Result) const {
@@ -480,7 +484,7 @@ inline std::error_code DiceRef::getLength(uint16_t &Result) const {
static_cast<const MachOObjectFile *>(OwningObject);
MachO::data_in_code_entry Dice = MachOOF->getDice(DicePimpl);
Result = Dice.length;
- return object_error::success;
+ return std::error_code();
inline std::error_code DiceRef::getKind(uint16_t &Result) const {
@@ -488,7 +492,7 @@ inline std::error_code DiceRef::getKind(uint16_t &Result) const {
static_cast<const MachOObjectFile *>(OwningObject);
MachO::data_in_code_entry Dice = MachOOF->getDice(DicePimpl);
Result = Dice.kind;
- return object_error::success;
+ return std::error_code();
inline DataRefImpl DiceRef::getRawDataRefImpl() const {
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index 14cd082..a1ae19e 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -66,11 +66,6 @@ public:
/// This is for display purposes only.
std::error_code getTypeName(SmallVectorImpl<char> &Result) const;
- /// @brief Get a string that represents the calculation of the value of this
- /// relocation.
- ///
- /// This is for display purposes only.
- std::error_code getValueString(SmallVectorImpl<char> &Result) const;
DataRefImpl getRawDataRefImpl() const;
const ObjectFile *getObjectFile() const;
@@ -146,8 +141,8 @@ public:
/// mapped).
std::error_code getAddress(uint64_t &Result) const;
/// @brief Get the alignment of this symbol as the actual value (not log 2).
- std::error_code getAlignment(uint32_t &Result) const;
- std::error_code getSize(uint64_t &Result) const;
+ uint32_t getAlignment() const;
+ uint64_t getSize() const;
std::error_code getType(SymbolRef::Type &Result) const;
std::error_code getOther(uint8_t &Result) const;
@@ -206,10 +201,8 @@ protected:
DataRefImpl Symb) const override;
virtual std::error_code getSymbolAddress(DataRefImpl Symb,
uint64_t &Res) const = 0;
- virtual std::error_code getSymbolAlignment(DataRefImpl Symb,
- uint32_t &Res) const;
- virtual std::error_code getSymbolSize(DataRefImpl Symb,
- uint64_t &Res) const = 0;
+ virtual uint32_t getSymbolAlignment(DataRefImpl Symb) const;
+ virtual uint64_t getSymbolSize(DataRefImpl Symb) const = 0;
virtual std::error_code getSymbolType(DataRefImpl Symb,
SymbolRef::Type &Res) const = 0;
virtual std::error_code getSymbolSection(DataRefImpl Symb,
@@ -254,13 +247,10 @@ protected:
virtual std::error_code
getRelocationTypeName(DataRefImpl Rel,
SmallVectorImpl<char> &Result) const = 0;
- virtual std::error_code
- getRelocationValueString(DataRefImpl Rel,
- SmallVectorImpl<char> &Result) const = 0;
virtual std::error_code getRelocationHidden(DataRefImpl Rel,
bool &Result) const {
Result = false;
- return object_error::success;
+ return std::error_code();
@@ -334,12 +324,12 @@ inline std::error_code SymbolRef::getAddress(uint64_t &Result) const {
return getObject()->getSymbolAddress(getRawDataRefImpl(), Result);
-inline std::error_code SymbolRef::getAlignment(uint32_t &Result) const {
- return getObject()->getSymbolAlignment(getRawDataRefImpl(), Result);
+inline uint32_t SymbolRef::getAlignment() const {
+ return getObject()->getSymbolAlignment(getRawDataRefImpl());
-inline std::error_code SymbolRef::getSize(uint64_t &Result) const {
- return getObject()->getSymbolSize(getRawDataRefImpl(), Result);
+inline uint64_t SymbolRef::getSize() const {
+ return getObject()->getSymbolSize(getRawDataRefImpl());
inline std::error_code SymbolRef::getSection(section_iterator &Result) const {
@@ -482,11 +472,6 @@ RelocationRef::getTypeName(SmallVectorImpl<char> &Result) const {
return OwningObject->getRelocationTypeName(RelocationPimpl, Result);
-inline std::error_code
-RelocationRef::getValueString(SmallVectorImpl<char> &Result) const {
- return OwningObject->getRelocationValueString(RelocationPimpl, Result);
inline std::error_code RelocationRef::getHidden(bool &Result) const {
return OwningObject->getRelocationHidden(RelocationPimpl, Result);
diff --git a/include/llvm/Object/RelocVisitor.h b/include/llvm/Object/RelocVisitor.h
index 91eafd5..02ffda5 100644
--- a/include/llvm/Object/RelocVisitor.h
+++ b/include/llvm/Object/RelocVisitor.h
@@ -19,9 +19,11 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Support/MachO.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
@@ -52,6 +54,8 @@ public:
return visitELF(RelocType, R, Value);
if (isa<COFFObjectFile>(ObjToVisit))
return visitCOFF(RelocType, R, Value);
+ if (isa<MachOObjectFile>(ObjToVisit))
+ return visitMachO(RelocType, R, Value);
HasError = true;
return RelocToApply();
@@ -221,6 +225,20 @@ private:
return RelocToApply();
+ RelocToApply visitMachO(uint32_t RelocType, RelocationRef R, uint64_t Value) {
+ switch (ObjToVisit.getArch()) {
+ default: break;
+ case Triple::x86_64:
+ switch (RelocType) {
+ default: break;
+ case MachO::X86_64_RELOC_UNSIGNED:
+ return visitMACHO_X86_64_UNSIGNED(R, Value);
+ }
+ }
+ HasError = true;
+ return RelocToApply();
+ }
int64_t getELFAddend32LE(RelocationRef R) {
const ELF32LEObjectFile *Obj = cast<ELF32LEObjectFile>(R.getObjectFile());
DataRefImpl DRI = R.getRawDataRefImpl();
@@ -252,6 +270,12 @@ private:
Obj->getRelocationAddend(DRI, Addend);
return Addend;
+ uint8_t getLengthMachO64(RelocationRef R) {
+ const MachOObjectFile *Obj = cast<MachOObjectFile>(R.getObjectFile());
+ return Obj->getRelocationLength(R.getRawDataRefImpl());
+ }
/// Operations
/// 386-ELF
@@ -413,6 +437,13 @@ private:
RelocToApply visitCOFF_AMD64_ADDR64(RelocationRef R, uint64_t Value) {
return RelocToApply(Value, /*Width=*/8);
+ // X86_64 MachO
+ RelocToApply visitMACHO_X86_64_UNSIGNED(RelocationRef R, uint64_t Value) {
+ uint8_t Length = getLengthMachO64(R);
+ Length = 1<<Length;
+ return RelocToApply(Value, Length);
+ }
diff --git a/include/llvm/Support/COFF.h b/include/llvm/Support/COFF.h
index 7f54822..b26af61 100644
--- a/include/llvm/Support/COFF.h
+++ b/include/llvm/Support/COFF.h
@@ -155,16 +155,6 @@ namespace COFF {
uint8_t NumberOfAuxSymbols;
- enum SymbolFlags {
- SF_TypeMask = 0x0000FFFF,
- SF_TypeShift = 0,
- SF_ClassMask = 0x00FF0000,
- SF_ClassShift = 16,
- SF_WeakExternal = 0x01000000
- };
enum SymbolSectionNumber : int32_t {
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index bd1d1cb..1ad8a3b 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -1284,24 +1284,81 @@ public:
-// Define how to hold a class type object, such as a string. Since we can
-// inherit from a class, we do so. This makes us exactly compatible with the
-// object in all cases that it is used.
+// Define how to hold a class type object, such as a string.
+// Originally this code inherited from std::vector. In transitioning to a new
+// API for command line options we should change this. The new implementation
+// of this list_storage specialization implements the minimum subset of the
+// std::vector API required for all the current clients.
-template <class DataType>
-class list_storage<DataType, bool> : public std::vector<DataType> {
+// FIXME: Reduce this API to a more narrow subset of std::vector
+template <class DataType> class list_storage<DataType, bool> {
+ std::vector<DataType> Storage;
- template <class T> void addValue(const T &V) {
- std::vector<DataType>::push_back(V);
+ typedef typename std::vector<DataType>::iterator iterator;
+ iterator begin() { return Storage.begin(); }
+ iterator end() { return Storage.end(); }
+ typedef typename std::vector<DataType>::const_iterator const_iterator;
+ const_iterator begin() const { return Storage.begin(); }
+ const_iterator end() const { return Storage.end(); }
+ typedef typename std::vector<DataType>::size_type size_type;
+ size_type size() const { return Storage.size(); }
+ bool empty() const { return Storage.empty(); }
+ void push_back(const DataType &value) { Storage.push_back(value); }
+ void push_back(DataType &&value) { Storage.push_back(value); }
+ typedef typename std::vector<DataType>::reference reference;
+ typedef typename std::vector<DataType>::const_reference const_reference;
+ reference operator[](size_type pos) { return Storage[pos]; }
+ const_reference operator[](size_type pos) const { return Storage[pos]; }
+ iterator erase(const_iterator pos) { return Storage.erase(pos); }
+ iterator erase(const_iterator first, const_iterator last) {
+ return Storage.erase(first, last);
+ }
+ iterator erase(iterator pos) { return Storage.erase(pos); }
+ iterator erase(iterator first, iterator last) {
+ return Storage.erase(first, last);
+ iterator insert(const_iterator pos, const DataType &value) {
+ return Storage.insert(pos, value);
+ }
+ iterator insert(const_iterator pos, DataType &&value) {
+ return Storage.insert(pos, value);
+ }
+ iterator insert(iterator pos, const DataType &value) {
+ return Storage.insert(pos, value);
+ }
+ iterator insert(iterator pos, DataType &&value) {
+ return Storage.insert(pos, value);
+ }
+ reference front() { return Storage.front(); }
+ const_reference front() const { return Storage.front(); }
+ operator std::vector<DataType>&() { return Storage; }
+ operator ArrayRef<DataType>() { return Storage; }
+ std::vector<DataType> *operator&() { return &Storage; }
+ const std::vector<DataType> *operator&() const { return &Storage; }
+ template <class T> void addValue(const T &V) { Storage.push_back(V); }
// list - A list of command line options.
-template <class DataType, class Storage = bool,
+template <class DataType, class StorageClass = bool,
class ParserClass = parser<DataType>>
-class list : public Option, public list_storage<DataType, Storage> {
+class list : public Option, public list_storage<DataType, StorageClass> {
std::vector<unsigned> Positions;
ParserClass Parser;
@@ -1319,7 +1376,7 @@ class list : public Option, public list_storage<DataType, Storage> {
typename ParserClass::parser_data_type();
if (Parser.parse(*this, ArgName, Arg, Val))
return true; // Parse Error!
- list_storage<DataType, Storage>::addValue(Val);
+ list_storage<DataType, StorageClass>::addValue(Val);
return false;
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index c81fbaf..67ef23d 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -350,19 +350,6 @@
-/// \brief Is unaligned memory access fast on the host machine.
-/// Don't specialize on alignment for platforms where unaligned memory accesses
-/// generates the same code as aligned memory accesses for common types.
-#if defined(_M_AMD64) || defined(_M_IX86) || defined(__amd64) || \
- defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || \
- defined(_X86_) || defined(__i386) || defined(__i386__)
/// \brief Mark debug helper function definitions like dump() that should not be
/// stripped from debug builds.
// FIXME: Move this to a private config.h as it's not usable in public headers.
diff --git a/include/llvm/Support/ELFRelocs/Hexagon.def b/include/llvm/Support/ELFRelocs/Hexagon.def
index c9d35b8..a698ecb 100644
--- a/include/llvm/Support/ELFRelocs/Hexagon.def
+++ b/include/llvm/Support/ELFRelocs/Hexagon.def
@@ -90,3 +90,11 @@ ELF_RELOC(R_HEX_IE_GOT_11_X, 82)
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index e316616..2cf7e0e 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -562,7 +562,7 @@ inline uint64_t MinAlign(uint64_t A, uint64_t B) {
/// Alignment should be a power of two. This method rounds up, so
/// alignAddr(7, 4) == 8 and alignAddr(8, 4) == 8.
-inline uintptr_t alignAddr(void *Addr, size_t Alignment) {
+inline uintptr_t alignAddr(const void *Addr, size_t Alignment) {
assert(Alignment && isPowerOf2_64((uint64_t)Alignment) &&
"Alignment is not a power of two!");
@@ -573,7 +573,7 @@ inline uintptr_t alignAddr(void *Addr, size_t Alignment) {
/// \brief Returns the necessary adjustment for aligning \c Ptr to \c Alignment
/// bytes, rounding up.
-inline size_t alignmentAdjustment(void *Ptr, size_t Alignment) {
+inline size_t alignmentAdjustment(const void *Ptr, size_t Alignment) {
return alignAddr(Ptr, Alignment) - (uintptr_t)Ptr;
diff --git a/include/llvm/Support/TargetParser.h b/include/llvm/Support/TargetParser.h
index ca626f2..777ee20 100644
--- a/include/llvm/Support/TargetParser.h
+++ b/include/llvm/Support/TargetParser.h
@@ -15,6 +15,10 @@
+// FIXME: vector is used because that's what clang uses for subtarget feature
+// lists, but SmallVector would probably be better
+#include <vector>
namespace llvm {
class StringRef;
@@ -28,13 +32,16 @@ namespace ARM {
// FPU names.
enum FPUKind {
+ FK_FPV4_SP_D16,
+ FK_FPV5_SP_D16,
@@ -44,6 +51,20 @@ namespace ARM {
+ // An FPU name implies one of three levels of Neon support:
+ enum NeonSupportLevel {
+ NS_None = 0, ///< No Neon
+ NS_Neon, ///< Neon
+ NS_Crypto ///< Neon with Crypto
+ };
+ // An FPU name restricts the FPU in one of three ways:
+ enum FPURestriction {
+ FR_None = 0, ///< No restriction
+ FR_D16, ///< Only 16 D registers
+ FR_SP_D16 ///< Only single-precision instructions, with 16 D registers
+ };
// Arch names.
enum ArchKind {
@@ -53,34 +74,34 @@ namespace ARM {
// Non-standard Arch names.
@@ -92,8 +113,15 @@ namespace ARM {
+ // Unsupported extensions.
@@ -132,9 +160,16 @@ public:
// Information by ID
static const char * getFPUName(unsigned FPUKind);
+ static unsigned getFPUVersion(unsigned FPUKind);
+ static unsigned getFPUNeonSupportLevel(unsigned FPUKind);
+ static unsigned getFPURestriction(unsigned FPUKind);
+ // FIXME: This should be moved to TargetTuple once it exists
+ static bool getFPUFeatures(unsigned FPUKind,
+ std::vector<const char*> &Features);
static const char * getArchName(unsigned ArchKind);
- static unsigned getArchDefaultCPUArch(unsigned ArchKind);
- static const char * getArchDefaultCPUName(unsigned ArchKind);
+ static unsigned getArchAttr(unsigned ArchKind);
+ static const char * getCPUAttr(unsigned ArchKind);
+ static const char * getSubArch(unsigned ArchKind);
static const char * getArchExtName(unsigned ArchExtKind);
static const char * getDefaultCPU(StringRef Arch);
diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h
index 408e908..837fc66 100644
--- a/include/llvm/Support/TargetRegistry.h
+++ b/include/llvm/Support/TargetRegistry.h
@@ -91,7 +91,7 @@ public:
typedef bool (*ArchMatchFnTy)(Triple::ArchType Arch);
typedef MCAsmInfo *(*MCAsmInfoCtorFnTy)(const MCRegisterInfo &MRI,
- StringRef TT);
+ const Triple &TT);
typedef MCCodeGenInfo *(*MCCodeGenInfoCtorFnTy)(StringRef TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL);
@@ -287,15 +287,15 @@ public:
/// createMCAsmInfo - Create a MCAsmInfo implementation for the specified
/// target triple.
- /// \param Triple This argument is used to determine the target machine
+ /// \param TheTriple This argument is used to determine the target machine
/// feature set; it should always be provided. Generally this should be
/// either the target triple from the module, or the target triple of the
/// host if that does not exist.
MCAsmInfo *createMCAsmInfo(const MCRegisterInfo &MRI,
- StringRef Triple) const {
+ StringRef TheTriple) const {
if (!MCAsmInfoCtorFn)
return nullptr;
- return MCAsmInfoCtorFn(MRI, Triple);
+ return MCAsmInfoCtorFn(MRI, Triple(TheTriple));
/// createMCCodeGenInfo - Create a MCCodeGenInfo implementation.
@@ -889,7 +889,8 @@ template <class MCAsmInfoImpl> struct RegisterMCAsmInfo {
- static MCAsmInfo *Allocator(const MCRegisterInfo & /*MRI*/, StringRef TT) {
+ static MCAsmInfo *Allocator(const MCRegisterInfo & /*MRI*/,
+ const Triple &TT) {
return new MCAsmInfoImpl(TT);
diff --git a/include/llvm/Support/YAMLTraits.h b/include/llvm/Support/YAMLTraits.h
index 3bdff20..c04294a 100644
--- a/include/llvm/Support/YAMLTraits.h
+++ b/include/llvm/Support/YAMLTraits.h
@@ -1090,6 +1090,9 @@ public:
bool setCurrentDocument();
bool nextDocument();
+ /// Returns the current node that's being parsed by the YAML Parser.
+ const Node *getCurrentNode() const;
llvm::SourceMgr SrcMgr; // must be before Strm
std::unique_ptr<llvm::yaml::Stream> Strm;
@@ -1111,7 +1114,7 @@ private:
class Output : public IO {
- Output(llvm::raw_ostream &, void *Ctxt=nullptr);
+ Output(llvm::raw_ostream &, void *Ctxt = nullptr, int WrapColumn = 70);
~Output() override;
bool outputting() override;
@@ -1167,6 +1170,7 @@ private:
llvm::raw_ostream &Out;
+ int WrapColumn;
SmallVector<InState, 8> StateStack;
int Column;
int ColumnAtFlowStart;
diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h
index 7576555..14ad636 100644
--- a/include/llvm/TableGen/Record.h
+++ b/include/llvm/TableGen/Record.h
@@ -17,48 +17,20 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/FoldingSet.h"
-#include "llvm/Support/Allocator.h"
+#include "llvm/ADT/PointerIntPair.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
namespace llvm {
-// RecTy subclasses.
-class BitRecTy;
-class BitsRecTy;
-class IntRecTy;
-class StringRecTy;
class ListRecTy;
-class DagRecTy;
-class RecordRecTy;
-// Init subclasses.
-class Init;
-class UnsetInit;
-class BitInit;
-class BitsInit;
-class IntInit;
-class StringInit;
-class ListInit;
-class UnOpInit;
-class BinOpInit;
-class TernOpInit;
-class DefInit;
-class DagInit;
-class TypedInit;
-class VarInit;
-class FieldInit;
-class VarBitInit;
-class VarListElementInit;
-// Other classes.
+struct MultiClass;
class Record;
class RecordVal;
-struct MultiClass;
class RecordKeeper;
@@ -81,12 +53,11 @@ public:
RecTyKind Kind;
std::unique_ptr<ListRecTy> ListTy;
- virtual void anchor();
RecTyKind getRecTyKind() const { return Kind; }
- RecTy(RecTyKind K) : Kind(K), ListTy(nullptr) {}
+ RecTy(RecTyKind K) : Kind(K) {}
virtual ~RecTy() {}
virtual std::string getAsString() const = 0;
@@ -95,40 +66,10 @@ public:
/// typeIsConvertibleTo - Return true if all values of 'this' type can be
/// converted to the specified type.
- virtual bool typeIsConvertibleTo(const RecTy *RHS) const = 0;
+ virtual bool typeIsConvertibleTo(const RecTy *RHS) const;
/// getListTy - Returns the type representing list<this>.
ListRecTy *getListTy();
-public: // These methods should only be called from subclasses of Init
- virtual Init *convertValue( UnsetInit *UI) { return nullptr; }
- virtual Init *convertValue( BitInit *BI) { return nullptr; }
- virtual Init *convertValue( BitsInit *BI) { return nullptr; }
- virtual Init *convertValue( IntInit *II) { return nullptr; }
- virtual Init *convertValue(StringInit *SI) { return nullptr; }
- virtual Init *convertValue( ListInit *LI) { return nullptr; }
- virtual Init *convertValue( UnOpInit *UO) {
- return convertValue((TypedInit*)UO);
- }
- virtual Init *convertValue( BinOpInit *BO) {
- return convertValue((TypedInit*)BO);
- }
- virtual Init *convertValue( TernOpInit *TO) {
- return convertValue((TypedInit*)TO);
- }
- virtual Init *convertValue(VarBitInit *VB) { return nullptr; }
- virtual Init *convertValue( DefInit *DI) { return nullptr; }
- virtual Init *convertValue( DagInit *DI) { return nullptr; }
- virtual Init *convertValue( TypedInit *TI) { return nullptr; }
- virtual Init *convertValue( VarInit *VI) {
- return convertValue((TypedInit*)VI);
- }
- virtual Init *convertValue( FieldInit *FI) {
- return convertValue((TypedInit*)FI);
- }
- virtual bool baseClassOf(const RecTy*) const;
inline raw_ostream &operator<<(raw_ostream &OS, const RecTy &Ty) {
@@ -149,20 +90,9 @@ public:
static BitRecTy *get() { return &Shared; }
- using RecTy::convertValue;
- Init *convertValue( UnsetInit *UI) override { return (Init*)UI; }
- Init *convertValue( BitInit *BI) override { return (Init*)BI; }
- Init *convertValue( BitsInit *BI) override;
- Init *convertValue( IntInit *II) override;
- Init *convertValue(VarBitInit *VB) override { return (Init*)VB; }
- Init *convertValue( TypedInit *TI) override;
std::string getAsString() const override { return "bit"; }
- bool typeIsConvertibleTo(const RecTy *RHS) const override {
- return RHS->baseClassOf(this);
- }
- bool baseClassOf(const RecTy*) const override;
+ bool typeIsConvertibleTo(const RecTy *RHS) const override;
/// BitsRecTy - 'bits<n>' - Represent a fixed number of bits
@@ -180,19 +110,9 @@ public:
unsigned getNumBits() const { return Size; }
- using RecTy::convertValue;
- Init *convertValue( UnsetInit *UI) override;
- Init *convertValue( BitInit *UI) override;
- Init *convertValue( BitsInit *BI) override;
- Init *convertValue( IntInit *II) override;
- Init *convertValue( TypedInit *TI) override;
std::string getAsString() const override;
- bool typeIsConvertibleTo(const RecTy *RHS) const override {
- return RHS->baseClassOf(this);
- }
- bool baseClassOf(const RecTy*) const override;
+ bool typeIsConvertibleTo(const RecTy *RHS) const override;
/// IntRecTy - 'int' - Represent an integer value of no particular size
@@ -208,20 +128,9 @@ public:
static IntRecTy *get() { return &Shared; }
- using RecTy::convertValue;
- Init *convertValue( UnsetInit *UI) override { return (Init*)UI; }
- Init *convertValue( BitInit *BI) override;
- Init *convertValue( BitsInit *BI) override;
- Init *convertValue( IntInit *II) override { return (Init*)II; }
- Init *convertValue( TypedInit *TI) override;
std::string getAsString() const override { return "int"; }
- bool typeIsConvertibleTo(const RecTy *RHS) const override {
- return RHS->baseClassOf(this);
- }
- bool baseClassOf(const RecTy*) const override;
+ bool typeIsConvertibleTo(const RecTy *RHS) const override;
/// StringRecTy - 'string' - Represent an string value
@@ -237,18 +146,7 @@ public:
static StringRecTy *get() { return &Shared; }
- using RecTy::convertValue;
- Init *convertValue( UnsetInit *UI) override { return (Init*)UI; }
- Init *convertValue(StringInit *SI) override { return (Init*)SI; }
- Init *convertValue( UnOpInit *UO) override;
- Init *convertValue( BinOpInit *BO) override;
- Init *convertValue( TypedInit *TI) override;
- std::string getAsString() const override { return "string"; }
- bool typeIsConvertibleTo(const RecTy *RHS) const override {
- return RHS->baseClassOf(this);
- }
+ std::string getAsString() const override;
/// ListRecTy - 'list<Ty>' - Represent a list of values, all of which must be of
@@ -267,18 +165,9 @@ public:
static ListRecTy *get(RecTy *T) { return T->getListTy(); }
RecTy *getElementType() const { return Ty; }
- using RecTy::convertValue;
- Init *convertValue( UnsetInit *UI) override { return (Init*)UI; }
- Init *convertValue( ListInit *LI) override;
- Init *convertValue( TypedInit *TI) override;
std::string getAsString() const override;
- bool typeIsConvertibleTo(const RecTy *RHS) const override {
- return RHS->baseClassOf(this);
- }
- bool baseClassOf(const RecTy*) const override;
+ bool typeIsConvertibleTo(const RecTy *RHS) const override;
/// DagRecTy - 'dag' - Represent a dag fragment
@@ -294,18 +183,7 @@ public:
static DagRecTy *get() { return &Shared; }
- using RecTy::convertValue;
- Init *convertValue( UnsetInit *UI) override { return (Init*)UI; }
- Init *convertValue( UnOpInit *UO) override;
- Init *convertValue( BinOpInit *BO) override;
- Init *convertValue( DagInit *DI) override { return (Init*)DI; }
- Init *convertValue( TypedInit *TI) override;
- std::string getAsString() const override { return "dag"; }
- bool typeIsConvertibleTo(const RecTy *RHS) const override {
- return RHS->baseClassOf(this);
- }
+ std::string getAsString() const override;
/// RecordRecTy - '[classname]' - Represent an instance of a class, such as:
@@ -325,17 +203,9 @@ public:
Record *getRecord() const { return Rec; }
- using RecTy::convertValue;
- Init *convertValue( UnsetInit *UI) override { return (Init*)UI; }
- Init *convertValue( DefInit *DI) override;
- Init *convertValue( TypedInit *TI) override;
std::string getAsString() const override;
- bool typeIsConvertibleTo(const RecTy *RHS) const override {
- return RHS->baseClassOf(this);
- }
- bool baseClassOf(const RecTy*) const override;
+ bool typeIsConvertibleTo(const RecTy *RHS) const override;
/// resolveTypes - Find a common type that T1 and T2 convert to.
@@ -418,10 +288,8 @@ public:
/// invokes print on stderr.
void dump() const;
- /// convertInitializerTo - This virtual function is a simple call-back
- /// function that should be overridden to call the appropriate
- /// RecTy::convertValue method.
- ///
+ /// convertInitializerTo - This virtual function converts to the appropriate
+ /// Init based on the passed in type.
virtual Init *convertInitializerTo(RecTy *Ty) const = 0;
/// convertInitializerBitRange - This method is used to implement the bitrange
@@ -511,6 +379,8 @@ public:
RecTy *getType() const { return Ty; }
+ Init *convertInitializerTo(RecTy *Ty) const override;
Init *
convertInitializerBitRange(const std::vector<unsigned> &Bits) const override;
Init *
@@ -535,7 +405,6 @@ class UnsetInit : public Init {
UnsetInit() : Init(IK_UnsetInit) {}
UnsetInit(const UnsetInit &) = delete;
UnsetInit &operator=(const UnsetInit &Other) = delete;
- void anchor() override;
static bool classof(const Init *I) {
@@ -543,9 +412,7 @@ public:
static UnsetInit *get();
- Init *convertInitializerTo(RecTy *Ty) const override {
- return Ty->convertValue(const_cast<UnsetInit *>(this));
- }
+ Init *convertInitializerTo(RecTy *Ty) const override;
Init *getBit(unsigned Bit) const override {
return const_cast<UnsetInit*>(this);
@@ -563,7 +430,6 @@ class BitInit : public Init {
explicit BitInit(bool V) : Init(IK_BitInit), Value(V) {}
BitInit(const BitInit &Other) = delete;
BitInit &operator=(BitInit &Other) = delete;
- void anchor() override;
static bool classof(const Init *I) {
@@ -573,9 +439,7 @@ public:
bool getValue() const { return Value; }
- Init *convertInitializerTo(RecTy *Ty) const override {
- return Ty->convertValue(const_cast<BitInit *>(this));
- }
+ Init *convertInitializerTo(RecTy *Ty) const override;
Init *getBit(unsigned Bit) const override {
assert(Bit < 1 && "Bit index out of range!");
@@ -608,9 +472,7 @@ public:
unsigned getNumBits() const { return Bits.size(); }
- Init *convertInitializerTo(RecTy *Ty) const override {
- return Ty->convertValue(const_cast<BitsInit *>(this));
- }
+ Init *convertInitializerTo(RecTy *Ty) const override;
Init *
convertInitializerBitRange(const std::vector<unsigned> &Bits) const override;
@@ -661,9 +523,7 @@ public:
int64_t getValue() const { return Value; }
- Init *convertInitializerTo(RecTy *Ty) const override {
- return Ty->convertValue(const_cast<IntInit *>(this));
- }
+ Init *convertInitializerTo(RecTy *Ty) const override;
Init *
convertInitializerBitRange(const std::vector<unsigned> &Bits) const override;
@@ -692,7 +552,6 @@ class StringInit : public TypedInit {
StringInit(const StringInit &Other) = delete;
StringInit &operator=(const StringInit &Other) = delete;
- void anchor() override;
static bool classof(const Init *I) {
@@ -702,9 +561,7 @@ public:
const std::string &getValue() const { return Value; }
- Init *convertInitializerTo(RecTy *Ty) const override {
- return Ty->convertValue(const_cast<StringInit *>(this));
- }
+ Init *convertInitializerTo(RecTy *Ty) const override;
std::string getAsString() const override { return "\"" + Value + "\""; }
std::string getAsUnquotedString() const override { return Value; }
@@ -746,7 +603,6 @@ public:
void Profile(FoldingSetNodeID &ID) const;
- unsigned getSize() const { return Values.size(); }
Init *getElement(unsigned i) const {
assert(i < Values.size() && "List element index out of range!");
return Values[i];
@@ -757,9 +613,7 @@ public:
Init *
convertInitListSlice(const std::vector<unsigned> &Elements) const override;
- Init *convertInitializerTo(RecTy *Ty) const override {
- return Ty->convertValue(const_cast<ListInit *>(this));
- }
+ Init *convertInitializerTo(RecTy *Ty) const override;
/// resolveReferences - This method is used by classes that refer to other
/// variables which may not be defined at the time they expression is formed.
@@ -772,10 +626,11 @@ public:
ArrayRef<Init*> getValues() const { return Values; }
- inline const_iterator begin() const { return Values.begin(); }
- inline const_iterator end () const { return Values.end(); }
+ const_iterator begin() const { return Values.begin(); }
+ const_iterator end () const { return Values.end(); }
- inline bool empty() const { return Values.empty(); }
+ size_t size () const { return Values.size(); }
+ bool empty() const { return Values.empty(); }
/// resolveListElementReference - This method is used to implement
/// VarListElementInit::resolveReferences. If the list element is resolvable
@@ -805,17 +660,13 @@ public:
// Clone - Clone this operator, replacing arguments with the new list
virtual OpInit *clone(std::vector<Init *> &Operands) const = 0;
- virtual int getNumOperands() const = 0;
- virtual Init *getOperand(int i) const = 0;
+ virtual unsigned getNumOperands() const = 0;
+ virtual Init *getOperand(unsigned i) const = 0;
// Fold - If possible, fold this to a simpler init. Return this if not
// possible to fold.
virtual Init *Fold(Record *CurRec, MultiClass *CurMultiClass) const = 0;
- Init *convertInitializerTo(RecTy *Ty) const override {
- return Ty->convertValue(const_cast<OpInit *>(this));
- }
Init *resolveListElementReference(Record &R, const RecordVal *RV,
unsigned Elt) const override;
@@ -851,8 +702,8 @@ public:
return UnOpInit::get(getOpcode(), *Operands.begin(), getType());
- int getNumOperands() const override { return 1; }
- Init *getOperand(int i) const override {
+ unsigned getNumOperands() const override { return 1; }
+ Init *getOperand(unsigned i) const override {
assert(i == 0 && "Invalid operand id for unary operator");
return getOperand();
@@ -899,13 +750,12 @@ public:
return BinOpInit::get(getOpcode(), Operands[0], Operands[1], getType());
- int getNumOperands() const override { return 2; }
- Init *getOperand(int i) const override {
- assert((i == 0 || i == 1) && "Invalid operand id for binary operator");
- if (i == 0) {
- return getLHS();
- } else {
- return getRHS();
+ unsigned getNumOperands() const override { return 2; }
+ Init *getOperand(unsigned i) const override {
+ switch (i) {
+ default: llvm_unreachable("Invalid operand id for binary operator");
+ case 0: return getLHS();
+ case 1: return getRHS();
@@ -955,16 +805,13 @@ public:
- int getNumOperands() const override { return 3; }
- Init *getOperand(int i) const override {
- assert((i == 0 || i == 1 || i == 2) &&
- "Invalid operand id for ternary operator");
- if (i == 0) {
- return getLHS();
- } else if (i == 1) {
- return getMHS();
- } else {
- return getRHS();
+ unsigned getNumOperands() const override { return 3; }
+ Init *getOperand(unsigned i) const override {
+ switch (i) {
+ default: llvm_unreachable("Invalid operand id for ternary operator");
+ case 0: return getLHS();
+ case 1: return getMHS();
+ case 2: return getRHS();
@@ -1004,10 +851,6 @@ public:
static VarInit *get(const std::string &VN, RecTy *T);
static VarInit *get(Init *VN, RecTy *T);
- Init *convertInitializerTo(RecTy *Ty) const override {
- return Ty->convertValue(const_cast<VarInit *>(this));
- }
const std::string &getName() const;
Init *getNameInit() const { return VarName; }
std::string getNameInitAsString() const {
@@ -1056,9 +899,7 @@ public:
static VarBitInit *get(TypedInit *T, unsigned B);
- Init *convertInitializerTo(RecTy *Ty) const override {
- return Ty->convertValue(const_cast<VarBitInit *>(this));
- }
+ Init *convertInitializerTo(RecTy *Ty) const override;
Init *getBitVar() const override { return TI; }
unsigned getBitNum() const override { return Bit; }
@@ -1095,10 +936,6 @@ public:
static VarListElementInit *get(TypedInit *T, unsigned E);
- Init *convertInitializerTo(RecTy *Ty) const override {
- return Ty->convertValue(const_cast<VarListElementInit *>(this));
- }
TypedInit *getVariable() const { return TI; }
unsigned getElementNum() const { return Element; }
@@ -1131,9 +968,7 @@ public:
static DefInit *get(Record*);
- Init *convertInitializerTo(RecTy *Ty) const override {
- return Ty->convertValue(const_cast<DefInit *>(this));
- }
+ Init *convertInitializerTo(RecTy *Ty) const override;
Record *getDef() const { return Def; }
@@ -1179,10 +1014,6 @@ public:
static FieldInit *get(Init *R, const std::string &FN);
static FieldInit *get(Init *R, const Init *FN);
- Init *convertInitializerTo(RecTy *Ty) const override {
- return Ty->convertValue(const_cast<FieldInit *>(this));
- }
Init *getBit(unsigned Bit) const override;
Init *resolveListElementReference(Record &R, const RecordVal *RV,
@@ -1228,9 +1059,7 @@ public:
void Profile(FoldingSetNodeID &ID) const;
- Init *convertInitializerTo(RecTy *Ty) const override {
- return Ty->convertValue(const_cast<DagInit *>(this));
- }
+ Init *convertInitializerTo(RecTy *Ty) const override;
Init *getOperator() const { return Val; }
@@ -1280,22 +1109,21 @@ public:
class RecordVal {
- Init *Name;
+ PointerIntPair<Init *, 1, bool> NameAndPrefix;
RecTy *Ty;
- unsigned Prefix;
Init *Value;
- RecordVal(Init *N, RecTy *T, unsigned P);
- RecordVal(const std::string &N, RecTy *T, unsigned P);
+ RecordVal(Init *N, RecTy *T, bool P);
+ RecordVal(const std::string &N, RecTy *T, bool P);
const std::string &getName() const;
- const Init *getNameInit() const { return Name; }
+ const Init *getNameInit() const { return NameAndPrefix.getPointer(); }
std::string getNameInitAsString() const {
return getNameInit()->getAsUnquotedString();
- unsigned getPrefix() const { return Prefix; }
+ bool getPrefix() const { return NameAndPrefix.getInt(); }
RecTy *getType() const { return Ty; }
Init *getValue() const { return Value; }
@@ -1344,7 +1172,7 @@ class Record {
// def Def : Class<Struct<i>>;
// These need to get fully resolved before instantiating any other
- // definitions that usie them (e.g. Def). However, inside a multiclass they
+ // definitions that use them (e.g. Def). However, inside a multiclass they
// can't be immediately resolved so we mark them ResolveFirst to fully
// resolve them later as soon as the multiclass is instantiated.
bool ResolveFirst;
@@ -1354,13 +1182,6 @@ class Record {
// Constructs a record.
- explicit Record(const std::string &N, ArrayRef<SMLoc> locs,
- RecordKeeper &records, bool Anonymous = false) :
- ID(LastID++), Name(StringInit::get(N)), Locs(locs.begin(), locs.end()),
- TrackedRecords(records), TheInit(nullptr), IsAnonymous(Anonymous),
- ResolveFirst(false) {
- init();
- }
explicit Record(Init *N, ArrayRef<SMLoc> locs, RecordKeeper &records,
bool Anonymous = false) :
ID(LastID++), Name(N), Locs(locs.begin(), locs.end()),
@@ -1368,6 +1189,10 @@ public:
ResolveFirst(false) {
+ explicit Record(const std::string &N, ArrayRef<SMLoc> locs,
+ RecordKeeper &records, bool Anonymous = false)
+ : Record(StringInit::get(N), locs, records, Anonymous) {}
// When copy-constructing a Record, we must still guarantee a globally unique
// ID number. All other fields can be copied normally.
@@ -1406,8 +1231,8 @@ public:
ArrayRef<SMRange> getSuperClassRanges() const { return SuperClassRanges; }
bool isTemplateArg(Init *Name) const {
- for (unsigned i = 0, e = TemplateArgs.size(); i != e; ++i)
- if (TemplateArgs[i] == Name) return true;
+ for (Init *TA : TemplateArgs)
+ if (TA == Name) return true;
return false;
bool isTemplateArg(StringRef Name) const {
@@ -1415,16 +1240,16 @@ public:
const RecordVal *getValue(const Init *Name) const {
- for (unsigned i = 0, e = Values.size(); i != e; ++i)
- if (Values[i].getNameInit() == Name) return &Values[i];
+ for (const RecordVal &Val : Values)
+ if (Val.getNameInit() == Name) return &Val;
return nullptr;
const RecordVal *getValue(StringRef Name) const {
return getValue(StringInit::get(Name));
RecordVal *getValue(const Init *Name) {
- for (unsigned i = 0, e = Values.size(); i != e; ++i)
- if (Values[i].getNameInit() == Name) return &Values[i];
+ for (RecordVal &Val : Values)
+ if (Val.getNameInit() == Name) return &Val;
return nullptr;
RecordVal *getValue(StringRef Name) {
@@ -1465,15 +1290,15 @@ public:
bool isSubClassOf(const Record *R) const {
- for (unsigned i = 0, e = SuperClasses.size(); i != e; ++i)
- if (SuperClasses[i] == R)
+ for (const Record *SC : SuperClasses)
+ if (SC == R)
return true;
return false;
bool isSubClassOf(StringRef Name) const {
- for (unsigned i = 0, e = SuperClasses.size(); i != e; ++i)
- if (SuperClasses[i]->getNameInitAsString() == Name)
+ for (const Record *SC : SuperClasses)
+ if (SC->getNameInitAsString() == Name)
return true;
return false;
@@ -1523,7 +1348,7 @@ public:
/// Return true if the named field is unset.
bool isValueUnset(StringRef FieldName) const {
- return getValueInit(FieldName) == UnsetInit::get();
+ return isa<UnsetInit>(getValueInit(FieldName));
/// getValueAsString - This method looks up the specified field and returns
@@ -1675,7 +1500,6 @@ struct LessRecordFieldName {
struct LessRecordRegister {
- static size_t min(size_t a, size_t b) { return a < b ? a : b; }
static bool ascii_isdigit(char x) { return x >= '0' && x <= '9'; }
struct RecordParts {
diff --git a/include/llvm/TableGen/SetTheory.h b/include/llvm/TableGen/SetTheory.h
index 595c29f..d4e0f53 100644
--- a/include/llvm/TableGen/SetTheory.h
+++ b/include/llvm/TableGen/SetTheory.h
@@ -47,9 +47,10 @@
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/SMLoc.h"
#include <map>
#include <vector>
@@ -58,7 +59,6 @@ namespace llvm {
class DagInit;
class Init;
class Record;
-class RecordKeeper;
class SetTheory {
diff --git a/include/llvm/Target/ b/include/llvm/Target/
index ec8a12d..d99f0e1 100644
--- a/include/llvm/Target/
+++ b/include/llvm/Target/
@@ -381,6 +381,7 @@ class Instruction {
bit hasPostISelHook = 0; // To be *adjusted* after isel by target hook.
bit hasCtrlDep = 0; // Does this instruction r/w ctrl-flow chains?
bit isNotDuplicable = 0; // Is it unsafe to duplicate this instruction?
+ bit isConvergent = 0; // Is this instruction convergent?
bit isAsCheapAsAMove = 0; // As cheap (or cheaper) than a move instruction.
bit hasExtraSrcRegAllocReq = 0; // Sources have special regalloc requirement?
bit hasExtraDefRegAllocReq = 0; // Defs have special regalloc requirement?
@@ -506,7 +507,7 @@ class Requires<list<Predicate> preds> {
/// ops definition - This is just a simple marker used to identify the operand
/// list for an instruction. outs and ins are identical both syntactically and
-/// semanticallyr; they are used to define def operands and use operands to
+/// semantically; they are used to define def operands and use operands to
/// improve readibility. This should be used like this:
/// (outs R32:$dst), (ins R32:$src1, R32:$src2) or something similar.
def ops;
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 5ec1565..902b99c 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -711,20 +711,22 @@ protected:
/// Target-dependent implementation for foldMemoryOperand.
/// Target-independent code in foldMemoryOperand will
/// take care of adding a MachineMemOperand to the newly created instruction.
- virtual MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- int FrameIndex) const {
+ /// The instruction and any auxiliary instructions necessary will be inserted
+ /// at InsertPt.
+ virtual MachineInstr *foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
return nullptr;
/// Target-dependent implementation for foldMemoryOperand.
/// Target-independent code in foldMemoryOperand will
/// take care of adding a MachineMemOperand to the newly created instruction.
- virtual MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- MachineInstr *LoadMI) const {
+ /// The instruction and any auxiliary instructions necessary will be inserted
+ /// at InsertPt.
+ virtual MachineInstr *foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const {
return nullptr;
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index e2fe9e8..616edd8 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -1431,7 +1431,8 @@ public:
/// load/store.
virtual bool GetAddrModeArguments(IntrinsicInst * /*I*/,
SmallVectorImpl<Value*> &/*Ops*/,
- Type *&/*AccessTy*/) const {
+ Type *&/*AccessTy*/,
+ unsigned AddrSpace = 0) const {
return false;
@@ -1456,7 +1457,12 @@ public:
/// The type may be VoidTy, in which case only return true if the addressing
/// mode is legal for a load/store of any legal type. TODO: Handle
/// pre/postinc as well.
- virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+ ///
+ /// If the address space cannot be determined, it will be -1.
+ ///
+ /// TODO: Remove default argument
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+ unsigned AddrSpace) const;
/// \brief Return the cost of the scaling factor used in the addressing mode
/// represented by AM for this target, for a load/store of the specified type.
@@ -1464,9 +1470,12 @@ public:
/// If the AM is supported, the return value must be >= 0.
/// If the AM is not supported, it returns a negative value.
/// TODO: Handle pre/postinc as well.
- virtual int getScalingFactorCost(const AddrMode &AM, Type *Ty) const {
+ /// TODO: Remove default argument
+ virtual int getScalingFactorCost(const AddrMode &AM, Type *Ty,
+ unsigned AS = 0) const {
// Default: assume that any scaling factor used in a legal AM is free.
- if (isLegalAddressingMode(AM, Ty)) return 0;
+ if (isLegalAddressingMode(AM, Ty, AS))
+ return 0;
return -1;
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index 07a8f6d..8f8b78d 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -15,6 +15,7 @@
+#include "llvm/Target/TargetRecip.h"
#include "llvm/MC/MCTargetOptions.h"
#include <string>
@@ -72,7 +73,8 @@ namespace llvm {
CompressDebugSections(false), FunctionSections(false),
DataSections(false), UniqueSectionNames(true), TrapUnreachable(false),
TrapFuncName(), FloatABIType(FloatABI::Default),
- AllowFPOpFusion(FPOpFusion::Standard), JTType(JumpTable::Single),
+ AllowFPOpFusion(FPOpFusion::Standard), Reciprocals(TargetRecip()),
+ JTType(JumpTable::Single),
ThreadModel(ThreadModel::POSIX) {}
/// PrintMachineCode - This flag is enabled when the -print-machineinstrs
@@ -206,6 +208,9 @@ namespace llvm {
/// the value of this option.
FPOpFusion::FPOpFusionMode AllowFPOpFusion;
+ /// This class encapsulates options for reciprocal-estimate code generation.
+ TargetRecip Reciprocals;
/// JTType - This flag specifies the type of jump-instruction table to
/// create for functions that have the jumptable attribute.
JumpTable::JumpTableType JTType;
@@ -240,6 +245,7 @@ inline bool operator==(const TargetOptions &LHS,
ARE_EQUAL(TrapFuncName) &&
ARE_EQUAL(FloatABIType) &&
ARE_EQUAL(AllowFPOpFusion) &&
+ ARE_EQUAL(Reciprocals) &&
ARE_EQUAL(ThreadModel) &&
diff --git a/include/llvm/Target/TargetRecip.h b/include/llvm/Target/TargetRecip.h
new file mode 100644
index 0000000..4cc3672
--- /dev/null
+++ b/include/llvm/Target/TargetRecip.h
@@ -0,0 +1,73 @@
+//===--------------------- llvm/Target/TargetRecip.h ------------*- C++ -*-===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This class is used to customize machine-specific reciprocal estimate code
+// generation in a target-independent way.
+// If a target does not support operations in this specification, then code
+// generation will default to using supported operations.
+#include "llvm/ADT/StringRef.h"
+#include <vector>
+#include <string>
+#include <map>
+namespace llvm {
+struct TargetRecip {
+ TargetRecip();
+ /// Initialize all or part of the operations from command-line options or
+ /// a front end.
+ TargetRecip(const std::vector<std::string> &Args);
+ /// Set whether a particular reciprocal operation is enabled and how many
+ /// refinement steps are needed when using it. Use "all" to set enablement
+ /// and refinement steps for all operations.
+ void setDefaults(const StringRef &Key, bool Enable, unsigned RefSteps);
+ /// Return true if the reciprocal operation has been enabled by default or
+ /// from the command-line. Return false if the operation has been disabled
+ /// by default or from the command-line.
+ bool isEnabled(const StringRef &Key) const;
+ /// Return the number of iterations necessary to refine the
+ /// the result of a machine instruction for the given reciprocal operation.
+ unsigned getRefinementSteps(const StringRef &Key) const;
+ bool operator==(const TargetRecip &Other) const;
+ enum {
+ Uninitialized = -1
+ };
+ struct RecipParams {
+ int8_t Enabled;
+ int8_t RefinementSteps;
+ RecipParams() : Enabled(Uninitialized), RefinementSteps(Uninitialized) {}
+ };
+ std::map<StringRef, RecipParams> RecipMap;
+ typedef std::map<StringRef, RecipParams>::iterator RecipIter;
+ typedef std::map<StringRef, RecipParams>::const_iterator ConstRecipIter;
+ bool parseGlobalParams(const std::string &Arg);
+ void parseIndividualParams(const std::vector<std::string> &Args);
+} // End llvm namespace
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index c4669f1..4676c95 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -16,10 +16,12 @@
#include "llvm/ADT/StringRef.h"
+#include <functional>
namespace llvm {
class BasicBlockPass;
+class Function;
class FunctionPass;
class ModulePass;
class Pass;
@@ -152,7 +154,14 @@ Pass *createLoopInterchangePass();
Pass *createLoopStrengthReducePass();
-Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset);
+// GlobalMerge - This pass merges internal (by default) globals into structs
+// to enable reuse of a base pointer by indexed addressing modes.
+// It can also be configured to focus on size optimizations only.
+Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset,
+ bool OnlyOptimizeForSize = false);
@@ -238,7 +247,8 @@ FunctionPass *createJumpThreadingPass(int Threshold = -1);
// CFGSimplification - Merge basic blocks, eliminate unreachable blocks,
// simplify terminator instructions, etc...
-FunctionPass *createCFGSimplificationPass(int Threshold = -1);
+FunctionPass *createCFGSimplificationPass(
+ int Threshold = -1, std::function<bool(const Function &)> Ftor = nullptr);
@@ -456,7 +466,7 @@ FunctionPass *createPlaceSafepointsPass();
// RewriteStatepointsForGC - Rewrite any gc.statepoints which do not yet have
// explicit relocations to include explicit relocations.
-FunctionPass *createRewriteStatepointsForGCPass();
+ModulePass *createRewriteStatepointsForGCPass();
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index f54e234..ce46d53 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -93,7 +93,7 @@ AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
// location this memory access defines. The best we can say
// is that if the call references what this instruction
// defines, it must be clobbered by this location.
- const AliasAnalysis::Location DefLoc = AA->getLocation(I);
+ const AliasAnalysis::Location DefLoc = MemoryLocation::get(I);
if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef)
return AliasAnalysis::ModRef;
@@ -267,78 +267,6 @@ AliasAnalysis::getModRefBehavior(const Function *F) {
// AliasAnalysis non-virtual helper method implementation
-AliasAnalysis::Location AliasAnalysis::getLocation(const LoadInst *LI) {
- AAMDNodes AATags;
- LI->getAAMetadata(AATags);
- return Location(LI->getPointerOperand(),
- getTypeStoreSize(LI->getType()), AATags);
-AliasAnalysis::Location AliasAnalysis::getLocation(const StoreInst *SI) {
- AAMDNodes AATags;
- SI->getAAMetadata(AATags);
- return Location(SI->getPointerOperand(),
- getTypeStoreSize(SI->getValueOperand()->getType()), AATags);
-AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) {
- AAMDNodes AATags;
- VI->getAAMetadata(AATags);
- return Location(VI->getPointerOperand(), UnknownSize, AATags);
-AliasAnalysis::getLocation(const AtomicCmpXchgInst *CXI) {
- AAMDNodes AATags;
- CXI->getAAMetadata(AATags);
- return Location(CXI->getPointerOperand(),
- getTypeStoreSize(CXI->getCompareOperand()->getType()),
- AATags);
-AliasAnalysis::getLocation(const AtomicRMWInst *RMWI) {
- AAMDNodes AATags;
- RMWI->getAAMetadata(AATags);
- return Location(RMWI->getPointerOperand(),
- getTypeStoreSize(RMWI->getValOperand()->getType()), AATags);
-AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) {
- uint64_t Size = UnknownSize;
- if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
- Size = C->getValue().getZExtValue();
- // memcpy/memmove can have AA tags. For memcpy, they apply
- // to both the source and the destination.
- AAMDNodes AATags;
- MTI->getAAMetadata(AATags);
- return Location(MTI->getRawSource(), Size, AATags);
-AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) {
- uint64_t Size = UnknownSize;
- if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
- Size = C->getValue().getZExtValue();
- // memcpy/memmove can have AA tags. For memcpy, they apply
- // to both the source and the destination.
- AAMDNodes AATags;
- MTI->getAAMetadata(AATags);
- return Location(MTI->getRawDest(), Size, AATags);
AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
// Be conservative in the face of volatile/atomic.
@@ -347,7 +275,7 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
// If the load address doesn't alias the given address, it doesn't read
// or write the specified memory.
- if (Loc.Ptr && !alias(getLocation(L), Loc))
+ if (Loc.Ptr && !alias(MemoryLocation::get(L), Loc))
return NoModRef;
// Otherwise, a load just reads.
@@ -363,7 +291,7 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
if (Loc.Ptr) {
// If the store address cannot alias the pointer in question, then the
// specified memory cannot be modified by the store.
- if (!alias(getLocation(S), Loc))
+ if (!alias(MemoryLocation::get(S), Loc))
return NoModRef;
// If the pointer is a pointer to constant memory, then it could not have
@@ -383,7 +311,7 @@ AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) {
if (Loc.Ptr) {
// If the va_arg address cannot alias the pointer in question, then the
// specified memory cannot be accessed by the va_arg.
- if (!alias(getLocation(V), Loc))
+ if (!alias(MemoryLocation::get(V), Loc))
return NoModRef;
// If the pointer is a pointer to constant memory, then it could not have
@@ -403,7 +331,7 @@ AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc) {
return ModRef;
// If the cmpxchg address does not alias the location, it does not access it.
- if (Loc.Ptr && !alias(getLocation(CX), Loc))
+ if (Loc.Ptr && !alias(MemoryLocation::get(CX), Loc))
return NoModRef;
return ModRef;
@@ -416,7 +344,7 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) {
return ModRef;
// If the atomicrmw address does not alias the location, it does not access it.
- if (Loc.Ptr && !alias(getLocation(RMW), Loc))
+ if (Loc.Ptr && !alias(MemoryLocation::get(RMW), Loc))
return NoModRef;
return ModRef;
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 273eacc..dd6a3a0 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -219,8 +219,8 @@ bool AAEval::runOnFunction(Function &F) {
I1 != E; ++I1) {
for (SetVector<Value *>::iterator I2 = Stores.begin(), E2 = Stores.end();
I2 != E2; ++I2) {
- switch (AA.alias(AA.getLocation(cast<LoadInst>(*I1)),
- AA.getLocation(cast<StoreInst>(*I2)))) {
+ switch (AA.alias(MemoryLocation::get(cast<LoadInst>(*I1)),
+ MemoryLocation::get(cast<StoreInst>(*I2)))) {
case AliasAnalysis::NoAlias:
PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
@@ -245,8 +245,8 @@ bool AAEval::runOnFunction(Function &F) {
for (SetVector<Value *>::iterator I1 = Stores.begin(), E = Stores.end();
I1 != E; ++I1) {
for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) {
- switch (AA.alias(AA.getLocation(cast<StoreInst>(*I1)),
- AA.getLocation(cast<StoreInst>(*I2)))) {
+ switch (AA.alias(MemoryLocation::get(cast<StoreInst>(*I1)),
+ MemoryLocation::get(cast<StoreInst>(*I2)))) {
case AliasAnalysis::NoAlias:
PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 50890c1..12c1c7d 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -130,7 +130,7 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
if (UnknownInsts.empty())
- UnknownInsts.push_back(I);
+ UnknownInsts.emplace_back(I);
if (!I->mayWriteToMemory()) {
AliasTy = MayAlias;
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 091943b..430b412 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -543,6 +543,10 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
return false;
+void BranchProbabilityInfo::releaseMemory() {
+ Weights.clear();
void BranchProbabilityInfo::print(raw_ostream &OS, const Module *) const {
OS << "---- Branch Probabilities ----\n";
// We print the probabilities from the last function the analysis ran over,
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 32fe9b9..b22ee7e 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -42,6 +42,7 @@ add_llvm_library(LLVMAnalysis
+ MemoryLocation.cpp
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index 808a38b..b16cdfe 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -779,23 +779,56 @@ void DependenceAnalysis::collectCommonLoops(const SCEV *Expression,
-void DependenceAnalysis::unifySubscriptType(Subscript *Pair) {
- const SCEV *Src = Pair->Src;
- const SCEV *Dst = Pair->Dst;
- IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
- IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
- if (SrcTy == nullptr || DstTy == nullptr) {
- assert(SrcTy == DstTy && "This function only unify integer types and "
- "expect Src and Dst share the same type "
- "otherwise.");
- return;
+void DependenceAnalysis::unifySubscriptType(ArrayRef<Subscript *> Pairs) {
+ unsigned widestWidthSeen = 0;
+ Type *widestType;
+ // Go through each pair and find the widest bit to which we need
+ // to extend all of them.
+ for (unsigned i = 0; i < Pairs.size(); i++) {
+ const SCEV *Src = Pairs[i]->Src;
+ const SCEV *Dst = Pairs[i]->Dst;
+ IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
+ IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
+ if (SrcTy == nullptr || DstTy == nullptr) {
+ assert(SrcTy == DstTy && "This function only unify integer types and "
+ "expect Src and Dst share the same type "
+ "otherwise.");
+ continue;
+ }
+ if (SrcTy->getBitWidth() > widestWidthSeen) {
+ widestWidthSeen = SrcTy->getBitWidth();
+ widestType = SrcTy;
+ }
+ if (DstTy->getBitWidth() > widestWidthSeen) {
+ widestWidthSeen = DstTy->getBitWidth();
+ widestType = DstTy;
+ }
- if (SrcTy->getBitWidth() > DstTy->getBitWidth()) {
- // Sign-extend Dst to typeof(Src) if typeof(Src) is wider than typeof(Dst).
- Pair->Dst = SE->getSignExtendExpr(Dst, SrcTy);
- } else if (SrcTy->getBitWidth() < DstTy->getBitWidth()) {
- // Sign-extend Src to typeof(Dst) if typeof(Dst) is wider than typeof(Src).
- Pair->Src = SE->getSignExtendExpr(Src, DstTy);
+ assert(widestWidthSeen > 0);
+ // Now extend each pair to the widest seen.
+ for (unsigned i = 0; i < Pairs.size(); i++) {
+ const SCEV *Src = Pairs[i]->Src;
+ const SCEV *Dst = Pairs[i]->Dst;
+ IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
+ IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
+ if (SrcTy == nullptr || DstTy == nullptr) {
+ assert(SrcTy == DstTy && "This function only unify integer types and "
+ "expect Src and Dst share the same type "
+ "otherwise.");
+ continue;
+ }
+ if (SrcTy->getBitWidth() < widestWidthSeen)
+ // Sign-extend Src to widestType
+ Pairs[i]->Src = SE->getSignExtendExpr(Src, widestType);
+ if (DstTy->getBitWidth() < widestWidthSeen) {
+ // Sign-extend Dst to widestType
+ Pairs[i]->Dst = SE->getSignExtendExpr(Dst, widestType);
+ }
@@ -2937,7 +2970,7 @@ const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const {
// return the coefficient (the step)
// corresponding to the specified loop.
// If there isn't one, return 0.
-// For example, given a*i + b*j + c*k, zeroing the coefficient
+// For example, given a*i + b*j + c*k, finding the coefficient
// corresponding to the j loop would yield b.
const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr,
const Loop *TargetLoop) const {
@@ -3574,13 +3607,16 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
SmallBitVector Sivs(Pairs);
SmallBitVector Mivs(Pairs);
SmallBitVector ConstrainedLevels(MaxLevels + 1);
+ SmallVector<Subscript *, 4> PairsInGroup;
for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
DEBUG(dbgs() << SJ << " ");
if (Pair[SJ].Classification == Subscript::SIV)
+ PairsInGroup.push_back(&Pair[SJ]);
+ unifySubscriptType(PairsInGroup);
DEBUG(dbgs() << "}\n");
while (Sivs.any()) {
bool Changed = false;
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 097b99e..ec56d88 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -3144,6 +3144,90 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
+/// replaced with RepOp.
+static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
+ const Query &Q,
+ unsigned MaxRecurse) {
+ // Trivial replacement.
+ if (V == Op)
+ return RepOp;
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return nullptr;
+ // If this is a binary operator, try to simplify it with the replaced op.
+ if (auto *B = dyn_cast<BinaryOperator>(I)) {
+ // Consider:
+ // %cmp = icmp eq i32 %x, 2147483647
+ // %add = add nsw i32 %x, 1
+ // %sel = select i1 %cmp, i32 -2147483648, i32 %add
+ //
+ // We can't replace %sel with %add unless we strip away the flags.
+ if (isa<OverflowingBinaryOperator>(B))
+ if (B->hasNoSignedWrap() || B->hasNoUnsignedWrap())
+ return nullptr;
+ if (isa<PossiblyExactOperator>(B))
+ if (B->isExact())
+ return nullptr;
+ if (MaxRecurse) {
+ if (B->getOperand(0) == Op)
+ return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), Q,
+ MaxRecurse - 1);
+ if (B->getOperand(1) == Op)
+ return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, Q,
+ MaxRecurse - 1);
+ }
+ }
+ // Same for CmpInsts.
+ if (CmpInst *C = dyn_cast<CmpInst>(I)) {
+ if (MaxRecurse) {
+ if (C->getOperand(0) == Op)
+ return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), Q,
+ MaxRecurse - 1);
+ if (C->getOperand(1) == Op)
+ return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, Q,
+ MaxRecurse - 1);
+ }
+ }
+ // TODO: We could hand off more cases to instsimplify here.
+ // If all operands are constant after substituting Op for RepOp then we can
+ // constant fold the instruction.
+ if (Constant *CRepOp = dyn_cast<Constant>(RepOp)) {
+ // Build a list of all constant operands.
+ SmallVector<Constant *, 8> ConstOps;
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ if (I->getOperand(i) == Op)
+ ConstOps.push_back(CRepOp);
+ else if (Constant *COp = dyn_cast<Constant>(I->getOperand(i)))
+ ConstOps.push_back(COp);
+ else
+ break;
+ }
+ // All operands were constants, fold it.
+ if (ConstOps.size() == I->getNumOperands()) {
+ if (CmpInst *C = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0],
+ ConstOps[1], Q.DL, Q.TLI);
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ if (!LI->isVolatile())
+ return ConstantFoldLoadFromConstPtr(ConstOps[0], Q.DL);
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), ConstOps,
+ Q.DL, Q.TLI);
+ }
+ }
+ return nullptr;
/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
/// the result. If not, this returns null.
static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
@@ -3172,29 +3256,28 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X
return TrueVal;
- const auto *ICI = dyn_cast<ICmpInst>(CondVal);
- unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits();
- if (ICI && BitWidth) {
+ if (const auto *ICI = dyn_cast<ICmpInst>(CondVal)) {
+ unsigned BitWidth = Q.DL.getTypeSizeInBits(TrueVal->getType());
ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *CmpLHS = ICI->getOperand(0);
+ Value *CmpRHS = ICI->getOperand(1);
APInt MinSignedValue = APInt::getSignBit(BitWidth);
Value *X;
const APInt *Y;
bool TrueWhenUnset;
bool IsBitTest = false;
if (ICmpInst::isEquality(Pred) &&
- match(ICI->getOperand(0), m_And(m_Value(X), m_APInt(Y))) &&
- match(ICI->getOperand(1), m_Zero())) {
+ match(CmpLHS, m_And(m_Value(X), m_APInt(Y))) &&
+ match(CmpRHS, m_Zero())) {
IsBitTest = true;
TrueWhenUnset = Pred == ICmpInst::ICMP_EQ;
- } else if (Pred == ICmpInst::ICMP_SLT &&
- match(ICI->getOperand(1), m_Zero())) {
- X = ICI->getOperand(0);
+ } else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) {
+ X = CmpLHS;
Y = &MinSignedValue;
IsBitTest = true;
TrueWhenUnset = false;
- } else if (Pred == ICmpInst::ICMP_SGT &&
- match(ICI->getOperand(1), m_AllOnes())) {
- X = ICI->getOperand(0);
+ } else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) {
+ X = CmpLHS;
Y = &MinSignedValue;
IsBitTest = true;
TrueWhenUnset = true;
@@ -3225,6 +3308,50 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
return TrueWhenUnset ? TrueVal : FalseVal;
+ if (ICI->hasOneUse()) {
+ const APInt *C;
+ if (match(CmpRHS, m_APInt(C))) {
+ // X < MIN ? T : F --> F
+ if (Pred == ICmpInst::ICMP_SLT && C->isMinSignedValue())
+ return FalseVal;
+ // X < MIN ? T : F --> F
+ if (Pred == ICmpInst::ICMP_ULT && C->isMinValue())
+ return FalseVal;
+ // X > MAX ? T : F --> F
+ if (Pred == ICmpInst::ICMP_SGT && C->isMaxSignedValue())
+ return FalseVal;
+ // X > MAX ? T : F --> F
+ if (Pred == ICmpInst::ICMP_UGT && C->isMaxValue())
+ return FalseVal;
+ }
+ }
+ // If we have an equality comparison then we know the value in one of the
+ // arms of the select. See if substituting this value into the arm and
+ // simplifying the result yields the same value as the other arm.
+ if (Pred == ICmpInst::ICMP_EQ) {
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+ TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+ TrueVal)
+ return FalseVal;
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+ FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+ FalseVal)
+ return FalseVal;
+ } else if (Pred == ICmpInst::ICMP_NE) {
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+ FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+ FalseVal)
+ return TrueVal;
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+ TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+ TrueVal)
+ return TrueVal;
+ }
return nullptr;
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index b70de00..c661c7b 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -177,15 +177,21 @@ void LoopAccessInfo::RuntimePointerCheck::print(
-bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
+unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks(
const SmallVectorImpl<int> *PtrPartition) const {
unsigned NumPointers = Pointers.size();
+ unsigned CheckCount = 0;
for (unsigned I = 0; I < NumPointers; ++I)
for (unsigned J = I + 1; J < NumPointers; ++J)
if (needsChecking(I, J, PtrPartition))
- return true;
- return false;
+ CheckCount++;
+ return CheckCount;
+bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
+ const SmallVectorImpl<int> *PtrPartition) const {
+ return getNumberOfChecks(PtrPartition) != 0;
namespace {
@@ -220,10 +226,11 @@ public:
/// \brief Check whether we can check the pointers at runtime for
- /// non-intersection.
+ /// non-intersection. Returns true when we have 0 pointers
+ /// (a check on 0 pointers for non-intersection will always return true).
bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck,
- unsigned &NumComparisons, ScalarEvolution *SE,
- Loop *TheLoop, const ValueToValueMap &Strides,
+ bool &NeedRTCheck, ScalarEvolution *SE, Loop *TheLoop,
+ const ValueToValueMap &Strides,
bool ShouldCheckStride = false);
/// \brief Goes over all memory accesses, checks whether a RT check is needed
@@ -289,29 +296,23 @@ static bool hasComputableBounds(ScalarEvolution *SE,
return AR->isAffine();
-/// \brief Check the stride of the pointer and ensure that it does not wrap in
-/// the address space.
-static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
- const ValueToValueMap &StridesMap);
bool AccessAnalysis::canCheckPtrAtRT(
- LoopAccessInfo::RuntimePointerCheck &RtCheck, unsigned &NumComparisons,
+ LoopAccessInfo::RuntimePointerCheck &RtCheck, bool &NeedRTCheck,
ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap,
bool ShouldCheckStride) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
bool CanDoRT = true;
+ NeedRTCheck = false;
+ if (!IsRTCheckNeeded) return true;
bool IsDepCheckNeeded = isDependencyCheckNeeded();
- NumComparisons = 0;
// We assign a consecutive id to access from different alias sets.
// Accesses between different groups doesn't need to be checked.
unsigned ASId = 1;
for (auto &AS : AST) {
- unsigned NumReadPtrChecks = 0;
- unsigned NumWritePtrChecks = 0;
// We assign consecutive id to access from different dependence sets.
// Accesses within the same set don't need a runtime check.
unsigned RunningDepId = 1;
@@ -322,11 +323,6 @@ bool AccessAnalysis::canCheckPtrAtRT(
bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
MemAccessInfo Access(Ptr, IsWrite);
- if (IsWrite)
- ++NumWritePtrChecks;
- else
- ++NumReadPtrChecks;
if (hasComputableBounds(SE, StridesMap, Ptr) &&
// When we run after a failing dependency check we have to make sure
// we don't have wrapping pointers.
@@ -354,16 +350,15 @@ bool AccessAnalysis::canCheckPtrAtRT(
- if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
- NumComparisons += 0; // Only one dependence set.
- else {
- NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks +
- NumWritePtrChecks - 1));
- }
+ // We need a runtime check if there are any accesses that need checking.
+ // However, some accesses cannot be checked (for example because we
+ // can't determine their bounds). In these cases we would need a check
+ // but wouldn't be able to add it.
+ NeedRTCheck = !CanDoRT || RtCheck.needsAnyChecking(nullptr);
// If the pointers that we would use for the bounds comparison have different
// address spaces, assume the values aren't directly comparable, so we can't
// use them for the runtime check. We also have to assume they could
@@ -510,8 +505,8 @@ static bool isInBoundsGep(Value *Ptr) {
/// \brief Check whether the access through \p Ptr has a constant stride.
-static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
- const ValueToValueMap &StridesMap) {
+int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
+ const ValueToValueMap &StridesMap) {
const Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
@@ -678,6 +673,42 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
return false;
+/// \brief Check the dependence for two accesses with the same stride \p Stride.
+/// \p Distance is the positive distance and \p TypeByteSize is type size in
+/// bytes.
+/// \returns true if they are independent.
+static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride,
+ unsigned TypeByteSize) {
+ assert(Stride > 1 && "The stride must be greater than 1");
+ assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
+ assert(Distance > 0 && "The distance must be non-zero");
+ // Skip if the distance is not multiple of type byte size.
+ if (Distance % TypeByteSize)
+ return false;
+ unsigned ScaledDist = Distance / TypeByteSize;
+ // No dependence if the scaled distance is not multiple of the stride.
+ // E.g.
+ // for (i = 0; i < 1024 ; i += 4)
+ // A[i+2] = A[i] + 1;
+ //
+ // Two accesses in memory (scaled distance is 2, stride is 4):
+ // | A[0] | | | | A[4] | | | |
+ // | | | A[2] | | | | A[6] | |
+ //
+ // E.g.
+ // for (i = 0; i < 1024 ; i += 3)
+ // A[i+4] = A[i] + 1;
+ //
+ // Two accesses in memory (scaled distance is 4, stride is 3):
+ // | A[0] | | | A[3] | | | A[6] | | |
+ // | | | | | A[4] | | | A[7] | |
+ return ScaledDist % Stride;
MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
const MemAccessInfo &B, unsigned BIdx,
@@ -778,34 +809,87 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
unsigned Distance = (unsigned) Val.getZExtValue();
+ unsigned Stride = std::abs(StrideAPtr);
+ if (Stride > 1 &&
+ areStridedAccessesIndependent(Distance, Stride, TypeByteSize))
+ return Dependence::NoDep;
// Bail out early if passed-in parameters make vectorization not feasible.
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
VectorizerParams::VectorizationFactor : 1);
unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
VectorizerParams::VectorizationInterleave : 1);
+ // The minimum number of iterations for a vectorized/unrolled version.
+ unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
+ // It's not vectorizable if the distance is smaller than the minimum distance
+ // needed for a vectroized/unrolled version. Vectorizing one iteration in
+ // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
+ // TypeByteSize (No need to plus the last gap distance).
+ //
+ // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
+ // foo(int *A) {
+ // int *B = (int *)((char *)A + 14);
+ // for (i = 0 ; i < 1024 ; i += 2)
+ // B[i] = A[i] + 1;
+ // }
+ //
+ // Two accesses in memory (stride is 2):
+ // | A[0] | | A[2] | | A[4] | | A[6] | |
+ // | B[0] | | B[2] | | B[4] |
+ //
+ // Distance needs for vectorizing iterations except the last iteration:
+ // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
+ // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
+ //
+ // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
+ // 12, which is less than distance.
+ //
+ // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
+ // the minimum distance needed is 28, which is greater than distance. It is
+ // not safe to do vectorization.
+ unsigned MinDistanceNeeded =
+ TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
+ if (MinDistanceNeeded > Distance) {
+ DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance
+ << '\n');
+ return Dependence::Backward;
+ }
- // The distance must be bigger than the size needed for a vectorized version
- // of the operation and the size of the vectorized operation must not be
- // bigger than the currrent maximum size.
- if (Distance < 2*TypeByteSize ||
- 2*TypeByteSize > MaxSafeDepDistBytes ||
- Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
- DEBUG(dbgs() << "LAA: Failure because of Positive distance "
- << Val.getSExtValue() << '\n');
+ // Unsafe if the minimum distance needed is greater than max safe distance.
+ if (MinDistanceNeeded > MaxSafeDepDistBytes) {
+ DEBUG(dbgs() << "LAA: Failure because it needs at least "
+ << MinDistanceNeeded << " size in bytes");
return Dependence::Backward;
// Positive distance bigger than max vectorization factor.
- MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ?
- Distance : MaxSafeDepDistBytes;
+ // FIXME: Should use max factor instead of max distance in bytes, which could
+ // not handle different types.
+ // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
+ // void foo (int *A, char *B) {
+ // for (unsigned i = 0; i < 1024; i++) {
+ // A[i+2] = A[i] + 1;
+ // B[i+2] = B[i] + 1;
+ // }
+ // }
+ //
+ // This case is currently unsafe according to the max safe distance. If we
+ // analyze the two accesses on array B, the max safe dependence distance
+ // is 2. Then we analyze the accesses on array A, the minimum distance needed
+ // is 8, which is less than 2 and forbidden vectorization, But actually
+ // both A and B could be vectorized by 2 iterations.
+ MaxSafeDepDistBytes =
+ Distance < MaxSafeDepDistBytes ? Distance : MaxSafeDepDistBytes;
bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
if (IsTrueDataDependence &&
couldPreventStoreLoadForward(Distance, TypeByteSize))
return Dependence::BackwardVectorizableButPreventsForwarding;
- DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() <<
- " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
+ DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
+ << " with max VF = "
+ << MaxSafeDepDistBytes / (TypeByteSize * Stride) << '\n');
return Dependence::BackwardVectorizable;
@@ -1066,7 +1150,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
if (Seen.insert(Ptr).second) {
- AliasAnalysis::Location Loc = AA->getLocation(ST);
+ AliasAnalysis::Location Loc = MemoryLocation::get(ST);
// The TBAA metadata could have a control dependency on the predication
// condition, so we cannot rely on it when determining whether or not we
// need runtime pointer checks.
@@ -1102,7 +1186,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
IsReadOnlyPtr = true;
- AliasAnalysis::Location Loc = AA->getLocation(LD);
+ AliasAnalysis::Location Loc = MemoryLocation::get(LD);
// The TBAA metadata could have a control dependency on the predication
// condition, so we cannot rely on it when determining whether or not we
// need runtime pointer checks.
@@ -1123,22 +1207,17 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
// Build dependence sets and check whether we need a runtime pointer bounds
// check.
- bool NeedRTCheck = Accesses.isRTCheckNeeded();
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
- bool CanDoRT = false;
- if (NeedRTCheck)
- CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop,
- Strides);
- DEBUG(dbgs() << "LAA: We need to do " << NumComparisons <<
- " pointer comparisons.\n");
+ bool NeedRTCheck;
+ bool CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck,
+ NeedRTCheck, SE,
+ TheLoop, Strides);
- // If we only have one set of dependences to check pointers among we don't
- // need a runtime check.
- if (NumComparisons == 0 && NeedRTCheck)
- NeedRTCheck = false;
+ DEBUG(dbgs() << "LAA: We need to do "
+ << PtrRtCheck.getNumberOfChecks(nullptr)
+ << " pointer comparisons.\n");
// Check that we found the bounds for the pointer.
if (CanDoRT)
@@ -1171,10 +1250,11 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
PtrRtCheck.Need = true;
- CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
+ CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NeedRTCheck, SE,
TheLoop, Strides, true);
// Check that we found the bounds for the pointer.
- if (!CanDoRT && NumComparisons > 0) {
+ if (NeedRTCheck && !CanDoRT) {
<< "cannot check memory dependencies at runtime");
DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
@@ -1319,7 +1399,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
DominatorTree *DT, LoopInfo *LI,
const ValueToValueMap &Strides)
- : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL),
+ : DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
MaxSafeDepDistBytes(-1U), CanVecMem(false),
StoreToLoopInvariantAddress(false) {
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 3c1826a..255bae6 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -124,11 +124,11 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
AliasAnalysis *AA) {
if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
if (LI->isUnordered()) {
- Loc = AA->getLocation(LI);
+ Loc = MemoryLocation::get(LI);
return AliasAnalysis::Ref;
if (LI->getOrdering() == Monotonic) {
- Loc = AA->getLocation(LI);
+ Loc = MemoryLocation::get(LI);
return AliasAnalysis::ModRef;
Loc = AliasAnalysis::Location();
@@ -137,11 +137,11 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (SI->isUnordered()) {
- Loc = AA->getLocation(SI);
+ Loc = MemoryLocation::get(SI);
return AliasAnalysis::Mod;
if (SI->getOrdering() == Monotonic) {
- Loc = AA->getLocation(SI);
+ Loc = MemoryLocation::get(SI);
return AliasAnalysis::ModRef;
Loc = AliasAnalysis::Location();
@@ -149,7 +149,7 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
- Loc = AA->getLocation(V);
+ Loc = MemoryLocation::get(V);
return AliasAnalysis::ModRef;
@@ -486,7 +486,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
- AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
+ AliasAnalysis::Location LoadLoc = MemoryLocation::get(LI);
// If we found a pointer, check if it could be the same as our pointer.
AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
@@ -575,7 +575,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// Ok, this store might clobber the query pointer. Check to see if it is
// a must alias: in this case, we want to return this as a def.
- AliasAnalysis::Location StoreLoc = AA->getLocation(SI);
+ AliasAnalysis::Location StoreLoc = MemoryLocation::get(SI);
// If we found a pointer, check if it could be the same as our pointer.
AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc);
@@ -872,7 +872,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
void MemoryDependenceAnalysis::
getNonLocalPointerDependency(Instruction *QueryInst,
SmallVectorImpl<NonLocalDepResult> &Result) {
- const AliasAnalysis::Location Loc = AA->getLocation(QueryInst);
+ const AliasAnalysis::Location Loc = MemoryLocation::get(QueryInst);
bool isLoad = isa<LoadInst>(QueryInst);
BasicBlock *FromBB = QueryInst->getParent();
@@ -1278,8 +1278,7 @@ getNonLocalPointerDepFromBB(Instruction *QueryInst,
// Get the PHI translated pointer in this predecessor. This can fail if
// not translatable, in which case the getAddr() returns null.
PHITransAddr &PredPointer = PredList.back().second;
- PredPointer.PHITranslateValue(BB, Pred, nullptr);
+ PredPointer.PHITranslateValue(BB, Pred, DT, /*MustDominate=*/false);
Value *PredPtrVal = PredPointer.getAddr();
// Check to see if we have already visited this pred block with another
diff --git a/lib/Analysis/MemoryLocation.cpp b/lib/Analysis/MemoryLocation.cpp
new file mode 100644
index 0000000..f87a017
--- /dev/null
+++ b/lib/Analysis/MemoryLocation.cpp
@@ -0,0 +1,90 @@
+//===- MemoryLocation.cpp - Memory location descriptions -------------------==//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+using namespace llvm;
+MemoryLocation MemoryLocation::get(const LoadInst *LI) {
+ AAMDNodes AATags;
+ LI->getAAMetadata(AATags);
+ const auto &DL = LI->getModule()->getDataLayout();
+ return MemoryLocation(LI->getPointerOperand(),
+ DL.getTypeStoreSize(LI->getType()), AATags);
+MemoryLocation MemoryLocation::get(const StoreInst *SI) {
+ AAMDNodes AATags;
+ SI->getAAMetadata(AATags);
+ const auto &DL = SI->getModule()->getDataLayout();
+ return MemoryLocation(SI->getPointerOperand(),
+ DL.getTypeStoreSize(SI->getValueOperand()->getType()),
+ AATags);
+MemoryLocation MemoryLocation::get(const VAArgInst *VI) {
+ AAMDNodes AATags;
+ VI->getAAMetadata(AATags);
+ return MemoryLocation(VI->getPointerOperand(), UnknownSize, AATags);
+MemoryLocation MemoryLocation::get(const AtomicCmpXchgInst *CXI) {
+ AAMDNodes AATags;
+ CXI->getAAMetadata(AATags);
+ const auto &DL = CXI->getModule()->getDataLayout();
+ return MemoryLocation(
+ CXI->getPointerOperand(),
+ DL.getTypeStoreSize(CXI->getCompareOperand()->getType()), AATags);
+MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) {
+ AAMDNodes AATags;
+ RMWI->getAAMetadata(AATags);
+ const auto &DL = RMWI->getModule()->getDataLayout();
+ return MemoryLocation(RMWI->getPointerOperand(),
+ DL.getTypeStoreSize(RMWI->getValOperand()->getType()),
+ AATags);
+MemoryLocation MemoryLocation::getForSource(const MemTransferInst *MTI) {
+ uint64_t Size = UnknownSize;
+ if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+ Size = C->getValue().getZExtValue();
+ // memcpy/memmove can have AA tags. For memcpy, they apply
+ // to both the source and the destination.
+ AAMDNodes AATags;
+ MTI->getAAMetadata(AATags);
+ return MemoryLocation(MTI->getRawSource(), Size, AATags);
+MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MTI) {
+ uint64_t Size = UnknownSize;
+ if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+ Size = C->getValue().getZExtValue();
+ // memcpy/memmove can have AA tags. For memcpy, they apply
+ // to both the source and the destination.
+ AAMDNodes AATags;
+ MTI->getAAMetadata(AATags);
+ return MemoryLocation(MTI->getRawDest(), Size, AATags);
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index 177684f..633d6aa 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -150,7 +150,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
if (!Inst) return V;
// Determine whether 'Inst' is an input to our PHI translatable expression.
- bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst);
+ bool isInput =
+ std::find(InstInputs.begin(), InstInputs.end(), Inst) != InstInputs.end();
// Handle inputs instructions if needed.
if (isInput) {
@@ -276,7 +277,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
isNSW = isNUW = false;
// If the old 'LHS' was an input, add the new 'LHS' as an input.
- if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) {
+ if (std::find(InstInputs.begin(), InstInputs.end(), BOp) !=
+ InstInputs.end()) {
RemoveInstInputs(BOp, InstInputs);
@@ -313,21 +315,26 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
/// PHITranslateValue - PHI translate the current address up the CFG from
-/// CurBB to Pred, updating our state to reflect any needed changes. If the
-/// dominator tree DT is non-null, the translated value must dominate
+/// CurBB to Pred, updating our state to reflect any needed changes. If
+/// 'MustDominate' is true, the translated value must dominate
/// PredBB. This returns true on failure and sets Addr to null.
bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB,
- const DominatorTree *DT) {
+ const DominatorTree *DT,
+ bool MustDominate) {
+ assert(DT || !MustDominate);
assert(Verify() && "Invalid PHITransAddr!");
- Addr = PHITranslateSubExpr(Addr, CurBB, PredBB, DT);
+ if (DT && DT->isReachableFromEntry(PredBB))
+ Addr =
+ PHITranslateSubExpr(Addr, CurBB, PredBB, MustDominate ? DT : nullptr);
+ else
+ Addr = nullptr;
assert(Verify() && "Invalid PHITransAddr!");
- if (DT) {
+ if (MustDominate)
// Make sure the value is live in the predecessor.
if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr))
if (!DT->dominates(Inst->getParent(), PredBB))
Addr = nullptr;
- }
return Addr == nullptr;
@@ -370,7 +377,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
// See if we have a version of this value already available and dominating
// PredBB. If so, there is no need to insert a new instance of it.
PHITransAddr Tmp(InVal, DL, AC);
- if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT))
+ if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT, /*MustDominate=*/true))
return Tmp.getAddr();
// If we don't have an available version of this value, it must be an
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 0bd427b..f82235d 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1712,7 +1712,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
// would confuse the logic below that expects proper IVs.
if (Value *V = SimplifyInstruction(Phi, DL, SE.TLI, SE.DT, SE.AC)) {
- DeadInsts.push_back(Phi);
+ DeadInsts.emplace_back(Phi);
DEBUG_WITH_TYPE(DebugType, dbgs()
<< "INDVARS: Eliminated constant iv: " << *Phi << '\n');
@@ -1787,7 +1787,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName);
- DeadInsts.push_back(IsomorphicInc);
+ DeadInsts.emplace_back(IsomorphicInc);
DEBUG_WITH_TYPE(DebugType, dbgs()
@@ -1800,13 +1800,30 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
- DeadInsts.push_back(Phi);
+ DeadInsts.emplace_back(Phi);
return NumElim;
bool SCEVExpander::isHighCostExpansionHelper(
const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) {
+ // Zero/One operand expressions
+ switch (S->getSCEVType()) {
+ case scUnknown:
+ case scConstant:
+ return false;
+ case scTruncate:
+ return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(), L,
+ Processed);
+ case scZeroExtend:
+ return isHighCostExpansionHelper(cast<SCEVZeroExtendExpr>(S)->getOperand(),
+ L, Processed);
+ case scSignExtend:
+ return isHighCostExpansionHelper(cast<SCEVSignExtendExpr>(S)->getOperand(),
+ L, Processed);
+ }
if (!Processed.insert(S).second)
return false;
@@ -1849,23 +1866,22 @@ bool SCEVExpander::isHighCostExpansionHelper(
- // Recurse past add expressions, which commonly occur in the
+ // HowManyLessThans uses a Max expression whenever the loop is not guarded by
+ // the exit condition.
+ if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
+ return true;
+ // Recurse past nary expressions, which commonly occur in the
// BackedgeTakenCount. They may already exist in program code, and if not,
// they are not too expensive rematerialize.
- if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) {
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
I != E; ++I) {
if (isHighCostExpansionHelper(*I, L, Processed))
return true;
- return false;
- // HowManyLessThans uses a Max expression whenever the loop is not guarded by
- // the exit condition.
- if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
- return true;
// If we haven't recognized an expensive SCEV pattern, assume it's an
// expression produced by program code.
return false;
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index e1744d1..24cada3 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -100,9 +100,10 @@ bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset,
bool HasBaseReg,
- int64_t Scale) const {
+ int64_t Scale,
+ unsigned AddrSpace) const {
return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
- Scale);
+ Scale, AddrSpace);
bool TargetTransformInfo::isLegalMaskedStore(Type *DataType,
@@ -118,9 +119,10 @@ bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset,
bool HasBaseReg,
- int64_t Scale) const {
+ int64_t Scale,
+ unsigned AddrSpace) const {
return TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
- Scale);
+ Scale, AddrSpace);
bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
@@ -235,6 +237,13 @@ TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
return TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+unsigned TargetTransformInfo::getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ unsigned Alignment, unsigned AddressSpace) const {
+ return TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace);
TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys) const {
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index a55712c..c4f0463 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -2967,38 +2967,25 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
// For GEPs, determine if the indexing lands within the allocated object.
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ Type *VTy = GEP->getType();
+ Type *Ty = VTy->getPointerElementType();
+ const Value *Base = GEP->getPointerOperand();
// Conservatively require that the base pointer be fully dereferenceable.
- if (!Visited.insert(GEP->getOperand(0)).second)
+ if (!Visited.insert(Base).second)
return false;
- if (!isDereferenceablePointer(GEP->getOperand(0), DL, CtxI,
+ if (!isDereferenceablePointer(Base, DL, CtxI,
DT, TLI, Visited))
return false;
- // Check the indices.
- gep_type_iterator GTI = gep_type_begin(GEP);
- for (User::const_op_iterator I = GEP->op_begin()+1,
- E = GEP->op_end(); I != E; ++I) {
- Value *Index = *I;
- Type *Ty = *GTI++;
- // Struct indices can't be out of bounds.
- if (isa<StructType>(Ty))
- continue;
- ConstantInt *CI = dyn_cast<ConstantInt>(Index);
- if (!CI)
- return false;
- // Zero is always ok.
- if (CI->isZero())
- continue;
- // Check to see that it's within the bounds of an array.
- ArrayType *ATy = dyn_cast<ArrayType>(Ty);
- if (!ATy)
- return false;
- if (CI->getValue().getActiveBits() > 64)
- return false;
- if (CI->getZExtValue() >= ATy->getNumElements())
- return false;
- }
- // Indices check out; this is dereferenceable.
- return true;
+ APInt Offset(DL.getPointerTypeSizeInBits(VTy), 0);
+ if (!GEP->accumulateConstantOffset(DL, Offset))
+ return false;
+ // Check if the load is within the bounds of the underlying object.
+ uint64_t LoadSize = DL.getTypeStoreSize(Ty);
+ Type *BaseType = Base->getType()->getPointerElementType();
+ return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType));
// For gc.relocate, look through relocations
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 05c2428..09fe6c0 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -252,7 +252,7 @@ void LLLexer::SkipLineComment() {
-/// LexAt - Lex all tokens that start with an @ character:
+/// Lex all tokens that start with an @ character.
/// GlobalVar @\"[^\"]*\"
/// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]*
/// GlobalVarID @[0-9]+
@@ -375,7 +375,7 @@ lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
return lltok::Error;
-/// LexPercent - Lex all tokens that start with a % character:
+/// Lex all tokens that start with a % character.
/// LocalVar ::= %\"[^\"]*\"
/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
/// LocalVarID ::= %[0-9]+
@@ -383,7 +383,7 @@ lltok::Kind LLLexer::LexPercent() {
return LexVar(lltok::LocalVar, lltok::LocalVarID);
-/// LexQuote - Lex all tokens that start with a " character:
+/// Lex all tokens that start with a " character.
/// QuoteLabel "[^"]+":
/// StringConstant "[^"]*"
lltok::Kind LLLexer::LexQuote() {
@@ -404,7 +404,7 @@ lltok::Kind LLLexer::LexQuote() {
return kind;
-/// LexExclaim:
+/// Lex all tokens that start with a ! character.
/// !foo
/// !
lltok::Kind LLLexer::LexExclaim() {
@@ -425,7 +425,7 @@ lltok::Kind LLLexer::LexExclaim() {
return lltok::exclaim;
-/// LexHash - Lex all tokens that start with a # character:
+/// Lex all tokens that start with a # character.
/// AttrGrpID ::= #[0-9]+
lltok::Kind LLLexer::LexHash() {
// Handle AttrGrpID: #[0-9]+
@@ -443,7 +443,7 @@ lltok::Kind LLLexer::LexHash() {
return lltok::Error;
-/// LexIdentifier: Handle several related productions:
+/// Lex a label, integer type, keyword, or hexadecimal integer constant.
/// Label [-a-zA-Z$._0-9]+:
/// IntegerType i[0-9]+
/// Keyword sdiv, float, ...
@@ -800,9 +800,8 @@ lltok::Kind LLLexer::LexIdentifier() {
return lltok::Error;
-/// Lex0x: Handle productions that start with 0x, knowing that it matches and
-/// that this is not a label:
+/// Lex all tokens that start with a 0x prefix, knowing they match and are not
+/// labels.
/// HexFPConstant 0x[0-9A-Fa-f]+
/// HexFP80Constant 0xK[0-9A-Fa-f]+
/// HexFP128Constant 0xL[0-9A-Fa-f]+
@@ -860,7 +859,7 @@ lltok::Kind LLLexer::Lex0x() {
-/// LexIdentifier: Handle several related productions:
+/// Lex tokens for a label or a numeric constant, possibly starting with -.
/// Label [-a-zA-Z$._0-9]+:
/// NInteger -[0-9]+
/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
@@ -938,6 +937,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
return lltok::APFloat;
+/// Lex a floating point constant starting with +.
/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
lltok::Kind LLLexer::LexPositive() {
// If the letter after the negative is a number, this is probably not a
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index a52e20f..681af2a 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -189,7 +189,7 @@ bool LLParser::ParseTopLevelEntities() {
// The Global variable production with no name can have many different
// optional leading prefixes, the production is:
// GlobalVar ::= OptionalLinkage OptionalVisibility OptionalDLLStorageClass
- // OptionalThreadLocal OptionalAddrSpace OptionalUnNammedAddr
+ // OptionalThreadLocal OptionalAddrSpace OptionalUnnamedAddr
// ('constant'|'global') ...
case lltok::kw_private: // OptionalLinkage
case lltok::kw_internal: // OptionalLinkage
@@ -615,12 +615,12 @@ static bool isValidVisibilityForLinkage(unsigned V, unsigned L) {
/// ParseAlias:
/// ::= GlobalVar '=' OptionalLinkage OptionalVisibility
/// OptionalDLLStorageClass OptionalThreadLocal
-/// OptionalUnNammedAddr 'alias' Aliasee
+/// OptionalUnnamedAddr 'alias' Aliasee
/// Aliasee
/// ::= TypeAndValue
-/// Everything through OptionalUnNammedAddr has already been parsed.
+/// Everything through OptionalUnnamedAddr has already been parsed.
bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, unsigned L,
unsigned Visibility, unsigned DLLStorageClass,
@@ -705,13 +705,13 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, unsigned L,
/// ParseGlobal
/// ::= GlobalVar '=' OptionalLinkage OptionalVisibility OptionalDLLStorageClass
-/// OptionalThreadLocal OptionalUnNammedAddr OptionalAddrSpace
+/// OptionalThreadLocal OptionalUnnamedAddr OptionalAddrSpace
/// OptionalExternallyInitialized GlobalType Type Const
/// ::= OptionalLinkage OptionalVisibility OptionalDLLStorageClass
-/// OptionalThreadLocal OptionalUnNammedAddr OptionalAddrSpace
+/// OptionalThreadLocal OptionalUnnamedAddr OptionalAddrSpace
/// OptionalExternallyInitialized GlobalType Type Const
-/// Everything up to and including OptionalUnNammedAddr has been parsed
+/// Everything up to and including OptionalUnnamedAddr has been parsed
/// already.
bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
@@ -1902,9 +1902,9 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
return Error(TypeLoc, "invalid type for function argument");
unsigned AttrIndex = 1;
- ArgList.push_back(ArgInfo(TypeLoc, ArgTy,
- AttributeSet::get(ArgTy->getContext(),
- AttrIndex++, Attrs), Name));
+ ArgList.emplace_back(TypeLoc, ArgTy, AttributeSet::get(ArgTy->getContext(),
+ AttrIndex++, Attrs),
+ std::move(Name));
while (EatIfPresent(lltok::comma)) {
// Handle ... at end of arg list.
@@ -1930,10 +1930,10 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
if (!ArgTy->isFirstClassType())
return Error(TypeLoc, "invalid type for function argument");
- ArgList.push_back(ArgInfo(TypeLoc, ArgTy,
- AttributeSet::get(ArgTy->getContext(),
- AttrIndex++, Attrs),
- Name));
+ ArgList.emplace_back(
+ TypeLoc, ArgTy,
+ AttributeSet::get(ArgTy->getContext(), AttrIndex++, Attrs),
+ std::move(Name));
@@ -3730,7 +3730,7 @@ bool LLParser::ParseDILocalVariable(MDNode *&Result, bool IsDistinct) {
OPTIONAL(file, MDField, ); \
OPTIONAL(line, LineField, ); \
OPTIONAL(type, MDField, ); \
- OPTIONAL(arg, MDUnsignedField, (0, UINT8_MAX)); \
+ OPTIONAL(arg, MDUnsignedField, (0, UINT16_MAX)); \
OPTIONAL(flags, DIFlagField, );
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index f6d5ccc..056d87b 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -63,9 +63,7 @@ public:
// vector compatibility methods
unsigned size() const { return ValuePtrs.size(); }
void resize(unsigned N) { ValuePtrs.resize(N); }
- void push_back(Value *V) {
- ValuePtrs.push_back(V);
- }
+ void push_back(Value *V) { ValuePtrs.emplace_back(V); }
void clear() {
assert(ResolveConstants.empty() && "Constants not resolved?");
@@ -1499,6 +1497,8 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty]
if (Record.size() < 2)
return Error("Invalid record");
+ if (Record[0] == 0)
+ return Error("Invalid vector length");
ResultTy = getTypeByID(Record[1]);
if (!ResultTy || !StructType::isValidElementType(ResultTy))
return Error("Invalid type");
@@ -1636,9 +1636,9 @@ std::error_code BitcodeReader::ParseMetadata() {
Code = Stream.ReadCode();
- // METADATA_NAME is always followed by METADATA_NAMED_NODE.
unsigned NextBitCode = Stream.readRecord(Code, Record);
- assert(NextBitCode == bitc::METADATA_NAMED_NODE); (void)NextBitCode;
+ if (NextBitCode != bitc::METADATA_NAMED_NODE)
+ return Error("METADATA_NAME not followed by METADATA_NAMED_NODE");
// Read named metadata elements.
unsigned Size = Record.size();
@@ -2065,10 +2065,13 @@ std::error_code BitcodeReader::ResolveGlobalAndAliasInits() {
if (ValID >= ValueList.size()) {
} else {
- if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
- AliasInitWorklist.back().first->setAliasee(C);
- else
+ Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]);
+ if (!C)
return Error("Expected a constant");
+ GlobalAlias *Alias = AliasInitWorklist.back().first;
+ if (C->getType() != Alias->getType())
+ return Error("Alias and aliasee types don't match");
+ Alias->setAliasee(C);
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 58b87e1..5fe4c4b 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -163,7 +163,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// all callee-saved registers. In non-return this is any
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo *MFI = MF.getFrameInfo();
- BitVector Pristine = MFI->getPristineRegs(BB);
+ BitVector Pristine = MFI->getPristineRegs(MF);
for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
unsigned Reg = *I;
if (!IsReturnBlock && !Pristine.test(Reg)) continue;
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.cpp b/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 2487aba..8c68383 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -38,7 +38,7 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
Entries[I.second.Number] =
? Asm.getObjFileLowering().getDebugThreadLocalSymbol(I.first)
- : MCSymbolRefExpr::Create(I.first, Asm.OutContext);
+ : MCSymbolRefExpr::create(I.first, Asm.OutContext);
for (const MCExpr *Entry : Entries)
Asm.OutStreamer->EmitValue(Entry, Asm.getDataLayout().getPointerSize());
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 206be70..2e3b83a 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -14,7 +14,7 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "DwarfDebug.h"
#include "DwarfException.h"
-#include "Win64Exception.h"
+#include "WinException.h"
#include "WinCodeViewLineTables.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
@@ -40,7 +40,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
@@ -268,8 +268,9 @@ bool AsmPrinter::doInitialization(Module &M) {
default: llvm_unreachable("unsupported unwinding information encoding");
case WinEH::EncodingType::Invalid:
+ case WinEH::EncodingType::X86:
case WinEH::EncodingType::Itanium:
- ES = new Win64Exception(this);
+ ES = new WinException(this);
@@ -511,7 +512,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (MAI->hasDotTypeDotSizeDirective())
// .size foo, 42
- OutStreamer->EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext));
+ OutStreamer->emitELFSize(cast<MCSymbolELF>(GVSym),
+ MCConstantExpr::create(Size, OutContext));
@@ -565,7 +567,7 @@ void AsmPrinter::EmitFunctionHeader() {
MCSymbol *CurPos = OutContext.createTempSymbol();
- MCSymbolRefExpr::Create(CurPos, OutContext));
+ MCSymbolRefExpr::create(CurPos, OutContext));
} else {
@@ -775,7 +777,7 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
// Emit a symbol assignment.
- MCConstantExpr::Create(FrameOffset, OutContext));
+ MCConstantExpr::create(FrameOffset, OutContext));
/// EmitFunctionBody - This method emits the body and trailer for a
@@ -899,11 +901,11 @@ void AsmPrinter::EmitFunctionBody() {
// We can get the size as difference between the function label and the
// temp label.
const MCExpr *SizeExp =
- MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(CurrentFnEnd, OutContext),
- MCSymbolRefExpr::Create(CurrentFnSymForSize,
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(CurrentFnEnd, OutContext),
+ MCSymbolRefExpr::create(CurrentFnSymForSize,
- OutStreamer->EmitELFSize(CurrentFnSym, SizeExp);
+ OutStreamer->emitELFSize(cast<MCSymbolELF>(CurrentFnSym), SizeExp);
for (const HandlerInfo &HI : Handlers) {
@@ -1325,9 +1327,9 @@ void AsmPrinter::EmitJumpTableInfo() {
// .set LJTSet, LBB32-base
const MCExpr *LHS =
- MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+ MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
OutStreamer->EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
- MCBinaryExpr::CreateSub(LHS, Base,
+ MCBinaryExpr::createSub(LHS, Base,
@@ -1367,14 +1369,14 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
case MachineJumpTableInfo::EK_BlockAddress:
// EK_BlockAddress - Each entry is a plain address of block, e.g.:
// .word LBB123
- Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+ Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
case MachineJumpTableInfo::EK_GPRel32BlockAddress: {
// EK_GPRel32BlockAddress - Each entry is an address of block, encoded
// with a relocation as gp-relative, e.g.:
// .gprel32 LBB123
MCSymbol *MBBSym = MBB->getSymbol();
- OutStreamer->EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+ OutStreamer->EmitGPRel32Value(MCSymbolRefExpr::create(MBBSym, OutContext));
@@ -1383,7 +1385,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
// with a relocation as gp-relative, e.g.:
// .gpdword LBB123
MCSymbol *MBBSym = MBB->getSymbol();
- OutStreamer->EmitGPRel64Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+ OutStreamer->EmitGPRel64Value(MCSymbolRefExpr::create(MBBSym, OutContext));
@@ -1396,14 +1398,14 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
// .set L4_5_set_123, LBB123 - LJTI1_2
// .word L4_5_set_123
if (MAI->doesSetDirectiveSuppressesReloc()) {
- Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()),
+ Value = MCSymbolRefExpr::create(GetJTSetSymbol(UID, MBB->getNumber()),
- Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+ Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, UID, OutContext);
- Value = MCBinaryExpr::CreateSub(Value, Base, OutContext);
+ Value = MCBinaryExpr::createSub(Value, Base, OutContext);
@@ -1595,8 +1597,8 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
// Get the Hi-Lo expression.
const MCExpr *Diff =
- MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext),
- MCSymbolRefExpr::Create(Lo, OutContext),
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(Hi, OutContext),
+ MCSymbolRefExpr::create(Lo, OutContext),
if (!MAI->doesSetDirectiveSuppressesReloc()) {
@@ -1622,10 +1624,10 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
// Emit Label+Offset (or just Label if Offset is zero)
- const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext);
+ const MCExpr *Expr = MCSymbolRefExpr::create(Label, OutContext);
if (Offset)
- Expr = MCBinaryExpr::CreateAdd(
- Expr, MCConstantExpr::Create(Offset, OutContext), OutContext);
+ Expr = MCBinaryExpr::createAdd(
+ Expr, MCConstantExpr::create(Offset, OutContext), OutContext);
OutStreamer->EmitValue(Expr, Size);
@@ -1662,16 +1664,16 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
MCContext &Ctx = OutContext;
if (CV->isNullValue() || isa<UndefValue>(CV))
- return MCConstantExpr::Create(0, Ctx);
+ return MCConstantExpr::create(0, Ctx);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
- return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
+ return MCConstantExpr::create(CI->getZExtValue(), Ctx);
if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
- return MCSymbolRefExpr::Create(getSymbol(GV), Ctx);
+ return MCSymbolRefExpr::create(getSymbol(GV), Ctx);
if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
- return MCSymbolRefExpr::Create(GetBlockAddressSymbol(BA), Ctx);
+ return MCSymbolRefExpr::create(GetBlockAddressSymbol(BA), Ctx);
const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
if (!CE) {
@@ -1712,7 +1714,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
return Base;
int64_t Offset = OffsetAI.getSExtValue();
- return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
+ return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx),
@@ -1755,8 +1757,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
// the high bits so we are sure to get a proper truncation if the input is
// a constant expr.
unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
- const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
- return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
+ const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
+ return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
// The MC library also has a right-shift operator, but it isn't consistently
@@ -1774,15 +1776,15 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
const MCExpr *RHS = lowerConstant(CE->getOperand(1));
switch (CE->getOpcode()) {
default: llvm_unreachable("Unknown binary operator constant cast expr");
- case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
- case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
- case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
- case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
- case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
- case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
- case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
- case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
- case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
+ case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
+ case Instruction::Sub: return MCBinaryExpr::createSub(LHS, RHS, Ctx);
+ case Instruction::Mul: return MCBinaryExpr::createMul(LHS, RHS, Ctx);
+ case Instruction::SDiv: return MCBinaryExpr::createDiv(LHS, RHS, Ctx);
+ case Instruction::SRem: return MCBinaryExpr::createMod(LHS, RHS, Ctx);
+ case Instruction::Shl: return MCBinaryExpr::createShl(LHS, RHS, Ctx);
+ case Instruction::And: return MCBinaryExpr::createAnd(LHS, RHS, Ctx);
+ case Instruction::Or: return MCBinaryExpr::createOr (LHS, RHS, Ctx);
+ case Instruction::Xor: return MCBinaryExpr::createXor(LHS, RHS, Ctx);
@@ -2106,13 +2108,13 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
// cstexpr := <gotequiv> - "." + <cst>
// cstexpr := <gotequiv> - (<foo> - <offset from @foo base>) + <cst>
- // After canonicalization by EvaluateAsRelocatable `ME` turns into:
+ // After canonicalization by evaluateAsRelocatable `ME` turns into:
// cstexpr := <gotequiv> - <foo> + gotpcrelcst, where
// gotpcrelcst := <offset from @foo base> + <cst>
MCValue MV;
- if (!(*ME)->EvaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute())
+ if (!(*ME)->evaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute())
const MCSymbol *GOTEquivSym = &MV.getSymA()->getSymbol();
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 3258961..7dbfddf 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -254,40 +254,34 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
void AsmPrinter::emitDwarfDIE(const DIE &Die) const {
- // Get the abbreviation for this DIE.
- const DIEAbbrev &Abbrev = Die.getAbbrev();
// Emit the code (index) for the abbreviation.
if (isVerbose())
- OutStreamer->AddComment("Abbrev [" + Twine(Abbrev.getNumber()) +
- "] 0x" + Twine::utohexstr(Die.getOffset()) +
- ":0x" + Twine::utohexstr(Die.getSize()) + " " +
- dwarf::TagString(Abbrev.getTag()));
- EmitULEB128(Abbrev.getNumber());
- const SmallVectorImpl<DIEValue *> &Values = Die.getValues();
- const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
+ OutStreamer->AddComment("Abbrev [" + Twine(Die.getAbbrevNumber()) + "] 0x" +
+ Twine::utohexstr(Die.getOffset()) + ":0x" +
+ Twine::utohexstr(Die.getSize()) + " " +
+ dwarf::TagString(Die.getTag()));
+ EmitULEB128(Die.getAbbrevNumber());
// Emit the DIE attribute values.
- for (unsigned i = 0, N = Values.size(); i < N; ++i) {
- dwarf::Attribute Attr = AbbrevData[i].getAttribute();
- dwarf::Form Form = AbbrevData[i].getForm();
+ for (const auto &V : Die.values()) {
+ dwarf::Attribute Attr = V.getAttribute();
+ dwarf::Form Form = V.getForm();
assert(Form && "Too many attributes for DIE (check abbreviation)");
if (isVerbose()) {
if (Attr == dwarf::DW_AT_accessibility)
- OutStreamer->AddComment(dwarf::AccessibilityString(
- cast<DIEInteger>(Values[i])->getValue()));
+ OutStreamer->AddComment(
+ dwarf::AccessibilityString(V.getDIEInteger().getValue()));
// Emit an attribute using the defined form.
- Values[i]->EmitValue(this, Form);
+ V.EmitValue(this, Form);
// Emit the DIE children if any.
- if (Abbrev.hasChildren()) {
- for (auto &Child : Die.getChildren())
+ if (Die.hasChildren()) {
+ for (auto &Child : Die.children())
OutStreamer->AddComment("End Of Children Mark");
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index e7631dd..793e629 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -402,10 +402,11 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
unsigned OpFlags = MI->getOperand(OpNo).getImm();
++OpNo; // Skip over the ID number.
- if (Modifier[0] == 'l') // labels are target independent
+ if (Modifier[0] == 'l') { // Labels are target independent.
// FIXME: What if the operand isn't an MBB, report error?
- OS << *MI->getOperand(OpNo).getMBB()->getSymbol();
- else {
+ const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
+ Sym->print(OS, AP->MAI);
+ } else {
if (InlineAsm::isMemKind(OpFlags)) {
Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
Modifier[0] ? Modifier : nullptr,
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index 01d2c72..f2da855 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -18,7 +18,7 @@ add_llvm_library(LLVMAsmPrinter
- Win64Exception.cpp
+ WinException.cpp
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 1ccffe9..fa8449e 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -107,6 +107,13 @@ void DIEAbbrev::print(raw_ostream &O) {
void DIEAbbrev::dump() { print(dbgs()); }
+DIEAbbrev DIE::generateAbbrev() const {
+ DIEAbbrev Abbrev(Tag, hasChildren());
+ for (const DIEValue &V : Values)
+ Abbrev.AddAttribute(V.getAttribute(), V.getForm());
+ return Abbrev;
/// Climb up the parent chain to get the unit DIE to which this DIE
/// belongs.
const DIE *DIE::getUnit() const {
@@ -128,22 +135,19 @@ const DIE *DIE::getUnitOrNull() const {
return nullptr;
-DIEValue *DIE::findAttribute(dwarf::Attribute Attribute) const {
- const SmallVectorImpl<DIEValue *> &Values = getValues();
- const DIEAbbrev &Abbrevs = getAbbrev();
+DIEValue DIE::findAttribute(dwarf::Attribute Attribute) const {
// Iterate through all the attributes until we find the one we're
// looking for, if we can't find it return NULL.
- for (size_t i = 0; i < Values.size(); ++i)
- if (Abbrevs.getData()[i].getAttribute() == Attribute)
- return Values[i];
- return nullptr;
+ for (const auto &V : values())
+ if (V.getAttribute() == Attribute)
+ return V;
+ return DIEValue();
#ifndef NDEBUG
void DIE::print(raw_ostream &O, unsigned IndentCount) const {
const std::string Indent(IndentCount, ' ');
- bool isBlock = Abbrev.getTag() == 0;
+ bool isBlock = getTag() == 0;
if (!isBlock) {
O << Indent
@@ -153,28 +157,26 @@ void DIE::print(raw_ostream &O, unsigned IndentCount) const {
<< ", Size: " << Size << "\n";
O << Indent
- << dwarf::TagString(Abbrev.getTag())
+ << dwarf::TagString(getTag())
<< " "
- << dwarf::ChildrenString(Abbrev.hasChildren()) << "\n";
+ << dwarf::ChildrenString(hasChildren()) << "\n";
} else {
O << "Size: " << Size << "\n";
- const SmallVectorImpl<DIEAbbrevData> &Data = Abbrev.getData();
IndentCount += 2;
- for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ for (unsigned i = 0, N = Values.size(); i < N; ++i) {
O << Indent;
if (!isBlock)
- O << dwarf::AttributeString(Data[i].getAttribute());
+ O << dwarf::AttributeString(Values[i].getAttribute());
O << "Blk[" << i << "]";
O << " "
- << dwarf::FormEncodingString(Data[i].getForm())
+ << dwarf::FormEncodingString(Values[i].getForm())
<< " ";
- Values[i]->print(O);
+ Values[i].print(O);
O << "\n";
IndentCount -= 2;
@@ -193,40 +195,24 @@ void DIE::dump() {
void DIEValue::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
switch (Ty) {
-#define EMIT_VALUE_IMPL(Kind) \
- case is##Kind: \
- cast<DIE##Kind>(this)->EmitValueImpl(AP, Form); \
+ case isNone:
+ llvm_unreachable("Expected valid DIEValue");
+ case is##T: \
+ getDIE##T().EmitValue(AP, Form); \
- EMIT_VALUE_IMPL(TypeSignature)
+#include "llvm/CodeGen/DIEValue.def"
unsigned DIEValue::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
switch (Ty) {
-#define SIZE_OF_IMPL(Kind) \
- case is##Kind: \
- return cast<DIE##Kind>(this)->SizeOfImpl(AP, Form);
- SIZE_OF_IMPL(Integer)
- SIZE_OF_IMPL(String)
- SIZE_OF_IMPL(TypeSignature)
-#undef SIZE_OF_IMPL
+ case isNone:
+ llvm_unreachable("Expected valid DIEValue");
+ case is##T: \
+ return getDIE##T().SizeOf(AP, Form);
+#include "llvm/CodeGen/DIEValue.def"
llvm_unreachable("Unknown DIE kind");
@@ -234,21 +220,13 @@ unsigned DIEValue::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
#ifndef NDEBUG
void DIEValue::print(raw_ostream &O) const {
switch (Ty) {
-#define PRINT_IMPL(Kind) \
- case is##Kind: \
- cast<DIE##Kind>(this)->printImpl(O); \
+ case isNone:
+ llvm_unreachable("Expected valid DIEValue");
+ case is##T: \
+ getDIE##T().print(O); \
- PRINT_IMPL(Integer)
- PRINT_IMPL(String)
- PRINT_IMPL(TypeSignature)
-#undef PRINT_IMPL
+#include "llvm/CodeGen/DIEValue.def"
@@ -263,7 +241,7 @@ void DIEValue::dump() const {
/// EmitValue - Emit integer of appropriate size.
-void DIEInteger::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const {
+void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
unsigned Size = ~0U;
switch (Form) {
case dwarf::DW_FORM_flag_present:
@@ -299,7 +277,7 @@ void DIEInteger::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const {
/// SizeOf - Determine size of integer value in bytes.
-unsigned DIEInteger::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_flag_present: return 0;
case dwarf::DW_FORM_flag: // Fall thru
@@ -328,7 +306,7 @@ unsigned DIEInteger::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
#ifndef NDEBUG
-void DIEInteger::printImpl(raw_ostream &O) const {
+void DIEInteger::print(raw_ostream &O) const {
O << "Int: " << (int64_t)Integer << " 0x";
@@ -340,13 +318,13 @@ void DIEInteger::printImpl(raw_ostream &O) const {
/// EmitValue - Emit expression value.
-void DIEExpr::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
AP->OutStreamer->EmitValue(Expr, SizeOf(AP, Form));
/// SizeOf - Determine size of expression value in bytes.
-unsigned DIEExpr::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
@@ -354,7 +332,7 @@ unsigned DIEExpr::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
#ifndef NDEBUG
-void DIEExpr::printImpl(raw_ostream &O) const { O << "Expr: " << *Expr; }
+void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; }
@@ -363,7 +341,7 @@ void DIEExpr::printImpl(raw_ostream &O) const { O << "Expr: " << *Expr; }
/// EmitValue - Emit label value.
-void DIELabel::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+void DIELabel::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
AP->EmitLabelReference(Label, SizeOf(AP, Form),
Form == dwarf::DW_FORM_strp ||
Form == dwarf::DW_FORM_sec_offset ||
@@ -372,7 +350,7 @@ void DIELabel::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
/// SizeOf - Determine size of label value in bytes.
-unsigned DIELabel::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
@@ -380,9 +358,7 @@ unsigned DIELabel::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
#ifndef NDEBUG
-void DIELabel::printImpl(raw_ostream &O) const {
- O << "Lbl: " << Label->getName();
+void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); }
@@ -391,13 +367,13 @@ void DIELabel::printImpl(raw_ostream &O) const {
/// EmitValue - Emit delta value.
-void DIEDelta::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+void DIEDelta::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form));
/// SizeOf - Determine size of delta value in bytes.
-unsigned DIEDelta::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
@@ -405,7 +381,7 @@ unsigned DIEDelta::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
#ifndef NDEBUG
-void DIEDelta::printImpl(raw_ostream &O) const {
+void DIEDelta::print(raw_ostream &O) const {
O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName();
@@ -416,7 +392,7 @@ void DIEDelta::printImpl(raw_ostream &O) const {
/// EmitValue - Emit string value.
-void DIEString::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+void DIEString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
(Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) &&
"Expected valid string form");
@@ -440,7 +416,7 @@ void DIEString::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
/// SizeOf - Determine size of delta value in bytes.
-unsigned DIEString::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
(Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) &&
"Expected valid string form");
@@ -458,7 +434,7 @@ unsigned DIEString::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
#ifndef NDEBUG
-void DIEString::printImpl(raw_ostream &O) const {
+void DIEString::print(raw_ostream &O) const {
O << "String: " << S.getString();
@@ -469,16 +445,16 @@ void DIEString::printImpl(raw_ostream &O) const {
/// EmitValue - Emit debug information entry offset.
-void DIEEntry::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_ref_addr) {
const DwarfDebug *DD = AP->getDwarfDebug();
- unsigned Addr = Entry.getOffset();
+ unsigned Addr = Entry->getOffset();
assert(!DD->useSplitDwarf() && "TODO: dwo files can't have relocations.");
// For DW_FORM_ref_addr, output the offset from beginning of debug info
// section. Entry->getOffset() returns the offset from start of the
// compile unit.
- DwarfCompileUnit *CU = DD->lookupUnit(Entry.getUnit());
+ DwarfCompileUnit *CU = DD->lookupUnit(Entry->getUnit());
assert(CU && "CUDie should belong to a CU.");
Addr += CU->getDebugInfoOffset();
if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
@@ -487,7 +463,7 @@ void DIEEntry::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
AP->OutStreamer->EmitIntValue(Addr, DIEEntry::getRefAddrSize(AP));
} else
- AP->EmitInt32(Entry.getOffset());
+ AP->EmitInt32(Entry->getOffset());
unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) {
@@ -503,7 +479,7 @@ unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) {
#ifndef NDEBUG
-void DIEEntry::printImpl(raw_ostream &O) const {
+void DIEEntry::print(raw_ostream &O) const {
O << format("Die: 0x%lx", (long)(intptr_t)&Entry);
@@ -511,14 +487,15 @@ void DIEEntry::printImpl(raw_ostream &O) const {
// DIETypeSignature Implementation
-void DIETypeSignature::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const {
+void DIETypeSignature::EmitValue(const AsmPrinter *Asm,
+ dwarf::Form Form) const {
assert(Form == dwarf::DW_FORM_ref_sig8);
- Asm->OutStreamer->EmitIntValue(Unit.getTypeSignature(), 8);
+ Asm->OutStreamer->EmitIntValue(Unit->getTypeSignature(), 8);
#ifndef NDEBUG
-void DIETypeSignature::printImpl(raw_ostream &O) const {
- O << format("Type Unit: 0x%lx", Unit.getTypeSignature());
+void DIETypeSignature::print(raw_ostream &O) const {
+ O << format("Type Unit: 0x%lx", Unit->getTypeSignature());
@@ -530,9 +507,8 @@ void DIETypeSignature::printImpl(raw_ostream &O) const {
unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const {
if (!Size) {
- const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
for (unsigned i = 0, N = Values.size(); i < N; ++i)
- Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm());
+ Size += Values[i].SizeOf(AP, Values[i].getForm());
return Size;
@@ -540,7 +516,7 @@ unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const {
/// EmitValue - Emit location data.
-void DIELoc::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const {
+void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
switch (Form) {
default: llvm_unreachable("Improper form for block");
case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
@@ -551,14 +527,13 @@ void DIELoc::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const {
Asm->EmitULEB128(Size); break;
- const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
for (unsigned i = 0, N = Values.size(); i < N; ++i)
- Values[i]->EmitValue(Asm, AbbrevData[i].getForm());
+ Values[i].EmitValue(Asm, Values[i].getForm());
/// SizeOf - Determine size of location data in bytes.
-unsigned DIELoc::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIELoc::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
@@ -571,7 +546,7 @@ unsigned DIELoc::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
#ifndef NDEBUG
-void DIELoc::printImpl(raw_ostream &O) const {
+void DIELoc::print(raw_ostream &O) const {
O << "ExprLoc: ";
DIE::print(O, 5);
@@ -585,9 +560,8 @@ void DIELoc::printImpl(raw_ostream &O) const {
unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const {
if (!Size) {
- const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
for (unsigned i = 0, N = Values.size(); i < N; ++i)
- Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm());
+ Size += Values[i].SizeOf(AP, Values[i].getForm());
return Size;
@@ -595,7 +569,7 @@ unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const {
/// EmitValue - Emit block data.
-void DIEBlock::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const {
+void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
switch (Form) {
default: llvm_unreachable("Improper form for block");
case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
@@ -604,14 +578,13 @@ void DIEBlock::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break;
- const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
for (unsigned i = 0, N = Values.size(); i < N; ++i)
- Values[i]->EmitValue(Asm, AbbrevData[i].getForm());
+ Values[i].EmitValue(Asm, Values[i].getForm());
/// SizeOf - Determine size of block data in bytes.
-unsigned DIEBlock::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
@@ -622,7 +595,7 @@ unsigned DIEBlock::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
#ifndef NDEBUG
-void DIEBlock::printImpl(raw_ostream &O) const {
+void DIEBlock::print(raw_ostream &O) const {
O << "Blk: ";
DIE::print(O, 5);
@@ -632,7 +605,7 @@ void DIEBlock::printImpl(raw_ostream &O) const {
// DIELocList Implementation
-unsigned DIELocList::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4)
return 4;
if (Form == dwarf::DW_FORM_sec_offset)
@@ -642,7 +615,7 @@ unsigned DIELocList::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
/// EmitValue - Emit label value.
-void DIELocList::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
DwarfDebug *DD = AP->getDwarfDebug();
MCSymbol *Label = DD->getDebugLocs().getList(Index).Label;
@@ -653,8 +626,5 @@ void DIELocList::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
#ifndef NDEBUG
-void DIELocList::printImpl(raw_ostream &O) const {
- O << "LocList: " << Index;
+void DIELocList::print(raw_ostream &O) const { O << "LocList: " << Index; }
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp
index a2e5aad..1445254 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -31,19 +31,12 @@ using namespace llvm;
/// \brief Grabs the string in whichever attribute is passed in and returns
/// a reference to it.
static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) {
- const SmallVectorImpl<DIEValue *> &Values = Die.getValues();
- const DIEAbbrev &Abbrevs = Die.getAbbrev();
// Iterate through all the attributes until we find the one we're
// looking for, if we can't find it return an empty string.
- for (size_t i = 0; i < Values.size(); ++i) {
- if (Abbrevs.getData()[i].getAttribute() == Attr) {
- DIEValue *V = Values[i];
- assert(isa<DIEString>(V) && "String requested. Not a string.");
- DIEString *S = cast<DIEString>(V);
- return S->getString();
- }
- }
+ for (const auto &V : Die.values())
+ if (V.getAttribute() == Attr)
+ return V.getDIEString().getString();
return StringRef("");
@@ -123,20 +116,16 @@ void DIEHash::addParentContext(const DIE &Parent) {
// Collect all of the attributes for a particular DIE in single structure.
void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) {
- const SmallVectorImpl<DIEValue *> &Values = Die.getValues();
- const DIEAbbrev &Abbrevs = Die.getAbbrev();
case dwarf::NAME: \
- Attrs.NAME.Val = Values[i]; \
- Attrs.NAME.Desc = &Abbrevs.getData()[i]; \
+ Attrs.NAME = V; \
- for (size_t i = 0, e = Values.size(); i != e; ++i) {
+ for (const auto &V : Die.values()) {
DEBUG(dbgs() << "Attribute: "
- << dwarf::AttributeString(Abbrevs.getData()[i].getAttribute())
+ << dwarf::AttributeString(V.getAttribute())
<< " added.\n");
- switch (Abbrevs.getData()[i].getAttribute()) {
+ switch (V.getAttribute()) {
@@ -274,11 +263,9 @@ void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
// Hash all of the values in a block like set of values. This assumes that
// all of the data is going to be added as integers.
-void DIEHash::hashBlockData(const SmallVectorImpl<DIEValue *> &Values) {
- for (SmallVectorImpl<DIEValue *>::const_iterator I = Values.begin(),
- E = Values.end();
- I != E; ++I)
- Hash.update((uint64_t)cast<DIEInteger>(*I)->getValue());
+void DIEHash::hashBlockData(const DIE::value_range &Values) {
+ for (const auto &V : Values)
+ Hash.update((uint64_t)V.getDIEInteger().getValue());
// Hash the contents of a loclistptr class.
@@ -292,10 +279,8 @@ void DIEHash::hashLocList(const DIELocList &LocList) {
// Hash an individual attribute \param Attr based on the type of attribute and
// the form.
-void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) {
- const DIEValue *Value = Attr.Val;
- const DIEAbbrevData *Desc = Attr.Desc;
- dwarf::Attribute Attribute = Desc->getAttribute();
+void DIEHash::hashAttribute(DIEValue Value, dwarf::Tag Tag) {
+ dwarf::Attribute Attribute = Value.getAttribute();
// Other attribute values use the letter 'A' as the marker, and the value
// consists of the form code (encoded as an unsigned LEB128 value) followed by
@@ -304,17 +289,20 @@ void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) {
// computation is limited to the following: DW_FORM_sdata, DW_FORM_flag,
// DW_FORM_string, and DW_FORM_block.
- switch (Value->getType()) {
+ switch (Value.getType()) {
+ case DIEValue::isNone:
+ llvm_unreachable("Expected valid DIEValue");
// 7.27 Step 3
// ... An attribute that refers to another type entry T is processed as
// follows:
case DIEValue::isEntry:
- hashDIEEntry(Attribute, Tag, cast<DIEEntry>(Value)->getEntry());
+ hashDIEEntry(Attribute, Tag, Value.getDIEEntry().getEntry());
case DIEValue::isInteger: {
- switch (Desc->getForm()) {
+ switch (Value.getForm()) {
case dwarf::DW_FORM_data1:
case dwarf::DW_FORM_data2:
case dwarf::DW_FORM_data4:
@@ -322,14 +310,14 @@ void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) {
case dwarf::DW_FORM_udata:
case dwarf::DW_FORM_sdata:
- addSLEB128((int64_t)cast<DIEInteger>(Value)->getValue());
+ addSLEB128((int64_t)Value.getDIEInteger().getValue());
// DW_FORM_flag_present is just flag with a value of one. We still give it a
// value so just use the value.
case dwarf::DW_FORM_flag_present:
case dwarf::DW_FORM_flag:
- addULEB128((int64_t)cast<DIEInteger>(Value)->getValue());
+ addULEB128((int64_t)Value.getDIEInteger().getValue());
llvm_unreachable("Unknown integer form!");
@@ -340,7 +328,7 @@ void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) {
- addString(cast<DIEString>(Value)->getString());
+ addString(Value.getDIEString().getString());
case DIEValue::isBlock:
case DIEValue::isLoc:
@@ -348,17 +336,17 @@ void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) {
- if (isa<DIEBlock>(Value)) {
- addULEB128(cast<DIEBlock>(Value)->ComputeSize(AP));
- hashBlockData(cast<DIEBlock>(Value)->getValues());
- } else if (isa<DIELoc>(Value)) {
- addULEB128(cast<DIELoc>(Value)->ComputeSize(AP));
- hashBlockData(cast<DIELoc>(Value)->getValues());
+ if (Value.getType() == DIEValue::isBlock) {
+ addULEB128(Value.getDIEBlock().ComputeSize(AP));
+ hashBlockData(Value.getDIEBlock().values());
+ } else if (Value.getType() == DIEValue::isLoc) {
+ addULEB128(Value.getDIELoc().ComputeSize(AP));
+ hashBlockData(Value.getDIELoc().values());
} else {
// We could add the block length, but that would take
// a bit of work and not add a lot of uniqueness
// to the hash in some way we could test.
- hashLocList(*cast<DIELocList>(Value));
+ hashLocList(Value.getDIELocList());
// FIXME: It's uncertain whether or not we should handle this at the moment.
@@ -375,7 +363,7 @@ void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) {
void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) {
#define ADD_ATTR(ATTR) \
{ \
- if (ATTR.Val != 0) \
+ if (ATTR) \
hashAttribute(ATTR, Tag); \
@@ -463,7 +451,7 @@ void DIEHash::computeHash(const DIE &Die) {
// Then hash each of the children of the DIE.
- for (auto &C : Die.getChildren()) {
+ for (auto &C : Die.children()) {
// 7.27 Step 7
// If C is a nested type entry or a member function entry, ...
if (isType(C->getTag()) || C->getTag() == dwarf::DW_TAG_subprogram) {
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h
index ac014b7..1850e04 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -26,64 +26,57 @@ class CompileUnit;
/// \brief An object containing the capability of hashing and adding hash
/// attributes onto a DIE.
class DIEHash {
- // The entry for a particular attribute.
- struct AttrEntry {
- const DIEValue *Val;
- const DIEAbbrevData *Desc;
- };
// Collection of all attributes used in hashing a particular DIE.
struct DIEAttrs {
- AttrEntry DW_AT_name;
- AttrEntry DW_AT_accessibility;
- AttrEntry DW_AT_address_class;
- AttrEntry DW_AT_allocated;
- AttrEntry DW_AT_artificial;
- AttrEntry DW_AT_associated;
- AttrEntry DW_AT_binary_scale;
- AttrEntry DW_AT_bit_offset;
- AttrEntry DW_AT_bit_size;
- AttrEntry DW_AT_bit_stride;
- AttrEntry DW_AT_byte_size;
- AttrEntry DW_AT_byte_stride;
- AttrEntry DW_AT_const_expr;
- AttrEntry DW_AT_const_value;
- AttrEntry DW_AT_containing_type;
- AttrEntry DW_AT_count;
- AttrEntry DW_AT_data_bit_offset;
- AttrEntry DW_AT_data_location;
- AttrEntry DW_AT_data_member_location;
- AttrEntry DW_AT_decimal_scale;
- AttrEntry DW_AT_decimal_sign;
- AttrEntry DW_AT_default_value;
- AttrEntry DW_AT_digit_count;
- AttrEntry DW_AT_discr;
- AttrEntry DW_AT_discr_list;
- AttrEntry DW_AT_discr_value;
- AttrEntry DW_AT_encoding;
- AttrEntry DW_AT_enum_class;
- AttrEntry DW_AT_endianity;
- AttrEntry DW_AT_explicit;
- AttrEntry DW_AT_is_optional;
- AttrEntry DW_AT_location;
- AttrEntry DW_AT_lower_bound;
- AttrEntry DW_AT_mutable;
- AttrEntry DW_AT_ordering;
- AttrEntry DW_AT_picture_string;
- AttrEntry DW_AT_prototyped;
- AttrEntry DW_AT_small;
- AttrEntry DW_AT_segment;
- AttrEntry DW_AT_string_length;
- AttrEntry DW_AT_threads_scaled;
- AttrEntry DW_AT_upper_bound;
- AttrEntry DW_AT_use_location;
- AttrEntry DW_AT_use_UTF8;
- AttrEntry DW_AT_variable_parameter;
- AttrEntry DW_AT_virtuality;
- AttrEntry DW_AT_visibility;
- AttrEntry DW_AT_vtable_elem_location;
- AttrEntry DW_AT_type;
+ DIEValue DW_AT_name;
+ DIEValue DW_AT_accessibility;
+ DIEValue DW_AT_address_class;
+ DIEValue DW_AT_allocated;
+ DIEValue DW_AT_artificial;
+ DIEValue DW_AT_associated;
+ DIEValue DW_AT_binary_scale;
+ DIEValue DW_AT_bit_offset;
+ DIEValue DW_AT_bit_size;
+ DIEValue DW_AT_bit_stride;
+ DIEValue DW_AT_byte_size;
+ DIEValue DW_AT_byte_stride;
+ DIEValue DW_AT_const_expr;
+ DIEValue DW_AT_const_value;
+ DIEValue DW_AT_containing_type;
+ DIEValue DW_AT_count;
+ DIEValue DW_AT_data_bit_offset;
+ DIEValue DW_AT_data_location;
+ DIEValue DW_AT_data_member_location;
+ DIEValue DW_AT_decimal_scale;
+ DIEValue DW_AT_decimal_sign;
+ DIEValue DW_AT_default_value;
+ DIEValue DW_AT_digit_count;
+ DIEValue DW_AT_discr;
+ DIEValue DW_AT_discr_list;
+ DIEValue DW_AT_discr_value;
+ DIEValue DW_AT_encoding;
+ DIEValue DW_AT_enum_class;
+ DIEValue DW_AT_endianity;
+ DIEValue DW_AT_explicit;
+ DIEValue DW_AT_is_optional;
+ DIEValue DW_AT_location;
+ DIEValue DW_AT_lower_bound;
+ DIEValue DW_AT_mutable;
+ DIEValue DW_AT_ordering;
+ DIEValue DW_AT_picture_string;
+ DIEValue DW_AT_prototyped;
+ DIEValue DW_AT_small;
+ DIEValue DW_AT_segment;
+ DIEValue DW_AT_string_length;
+ DIEValue DW_AT_threads_scaled;
+ DIEValue DW_AT_upper_bound;
+ DIEValue DW_AT_use_location;
+ DIEValue DW_AT_use_UTF8;
+ DIEValue DW_AT_variable_parameter;
+ DIEValue DW_AT_virtuality;
+ DIEValue DW_AT_visibility;
+ DIEValue DW_AT_vtable_elem_location;
+ DIEValue DW_AT_type;
// Insert any additional ones here...
@@ -135,13 +128,13 @@ private:
/// \brief Hashes the data in a block like DIEValue, e.g. DW_FORM_block or
/// DW_FORM_exprloc.
- void hashBlockData(const SmallVectorImpl<DIEValue *> &Values);
+ void hashBlockData(const DIE::value_range &Values);
/// \brief Hashes the contents pointed to in the .debug_loc section.
void hashLocList(const DIELocList &LocList);
/// \brief Hashes an individual attribute.
- void hashAttribute(AttrEntry Attr, dwarf::Tag Tag);
+ void hashAttribute(DIEValue Value, dwarf::Tag Tag);
/// \brief Hashes an attribute that refers to another DIE.
void hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index 58b406b..f8cdde2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -192,9 +192,9 @@ void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) {
PrevHash = HashValue;
Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i));
MCContext &Context = Asm->OutStreamer->getContext();
- const MCExpr *Sub = MCBinaryExpr::CreateSub(
- MCSymbolRefExpr::Create((*HI)->Sym, Context),
- MCSymbolRefExpr::Create(SecBegin, Context), Context);
+ const MCExpr *Sub = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create((*HI)->Sym, Context),
+ MCSymbolRefExpr::create(SecBegin, Context), Context);
Asm->OutStreamer->EmitValue(Sub, sizeof(uint32_t));
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index c10e703..689184a 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -42,8 +42,7 @@ void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
DD->addArangeLabel(SymbolCU(this, Label));
unsigned idx = DD->getAddressPool().getIndex(Label);
- DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
- Die.addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value);
+ Die.addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, DIEInteger(idx));
void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
@@ -52,9 +51,10 @@ void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
if (Label)
DD->addArangeLabel(SymbolCU(this, Label));
- Die.addValue(Attribute, dwarf::DW_FORM_addr,
- Label ? (DIEValue *)new (DIEValueAllocator) DIELabel(Label)
- : new (DIEValueAllocator) DIEInteger(0));
+ if (Label)
+ Die.addValue(Attribute, dwarf::DW_FORM_addr, DIELabel(Label));
+ else
+ Die.addValue(Attribute, dwarf::DW_FORM_addr, DIEInteger(0));
unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName,
@@ -145,7 +145,7 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
bool addToAccelTable = false;
if (auto *Global = dyn_cast_or_null<GlobalVariable>(GV->getVariable())) {
addToAccelTable = true;
- DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
const MCSymbol *Sym = Asm->getSymbol(Global);
if (Global->isThreadLocal()) {
// FIXME: Make this work with -gsplit-dwarf.
@@ -183,7 +183,7 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
} else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getVariable())) {
addToAccelTable = true;
// GV is a merged global.
- DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
Value *Ptr = CE->getOperand(0);
MCSymbol *Sym = Asm->getSymbol(cast<GlobalValue>(Ptr));
DD->addArangeLabel(SymbolCU(this, Sym));
@@ -242,7 +242,7 @@ void DwarfCompileUnit::initStmtList() {
MCSymbol *LineTableStartSym =
- stmtListIndex = UnitDie.getValues().size();
+ stmtListIndex = std::distance(UnitDie.values_begin(), UnitDie.values_end());
// DW_AT_stmt_list is a offset of line number information for this
// compile unit in debug_line section. For split dwarf this is
@@ -255,9 +255,7 @@ void DwarfCompileUnit::initStmtList() {
void DwarfCompileUnit::applyStmtList(DIE &D) {
- D.addValue(dwarf::DW_AT_stmt_list,
- UnitDie.getAbbrev().getData()[stmtListIndex].getForm(),
- UnitDie.getValues()[stmtListIndex]);
+ D.addValue(UnitDie.values_begin()[stmtListIndex]);
void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin,
@@ -365,10 +363,9 @@ void DwarfCompileUnit::constructScopeDIE(
void DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Hi, const MCSymbol *Lo) {
- DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
Die.addValue(Attribute, DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
: dwarf::DW_FORM_data4,
- Value);
+ new (DIEValueAllocator) DIEDelta(Hi, Lo));
void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
@@ -515,7 +512,7 @@ DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
return VariableDie;
auto Expr = DV.getExpression().begin();
- DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
for (auto FI : DV.getFrameIndex()) {
unsigned FrameReg = 0;
@@ -739,7 +736,7 @@ void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
/// Add an address attribute to a die based on the location provided.
void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
const MachineLocation &Location) {
- DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
bool validReg;
if (Location.isReg())
@@ -761,7 +758,7 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
dwarf::Attribute Attribute,
const MachineLocation &Location) {
- DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
assert(DV.getExpression().size() == 1);
const DIExpression *Expr = DV.getExpression().back();
@@ -782,10 +779,9 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
/// Add a Dwarf loclistptr attribute data and value.
void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute,
unsigned Index) {
- DIEValue *Value = new (DIEValueAllocator) DIELocList(Index);
dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
: dwarf::DW_FORM_data4;
- Die.addValue(Attribute, Form, Value);
+ Die.addValue(Attribute, Form, DIELocList(Index));
void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
@@ -802,8 +798,7 @@ void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
/// Add a Dwarf expression attribute data and value.
void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form,
const MCExpr *Expr) {
- DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr);
- Die.addValue((dwarf::Attribute)0, Form, Value);
+ Die.addValue((dwarf::Attribute)0, Form, DIEExpr(Expr));
void DwarfCompileUnit::applySubprogramAttributesToDefinition(
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 105ff6c..3f6665b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1340,9 +1340,8 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
// We could have a specification DIE that has our most of our knowledge,
// look for that now.
- DIEValue *SpecVal = Die->findAttribute(dwarf::DW_AT_specification);
- if (SpecVal) {
- DIE &SpecDIE = cast<DIEEntry>(SpecVal)->getEntry();
+ if (DIEValue SpecVal = Die->findAttribute(dwarf::DW_AT_specification)) {
+ DIE &SpecDIE = SpecVal.getDIEEntry().getEntry();
if (SpecDIE.findAttribute(dwarf::DW_AT_external))
Linkage = dwarf::GIEL_EXTERNAL;
} else if (Die->findAttribute(dwarf::DW_AT_external))
@@ -1563,6 +1562,8 @@ void DwarfDebug::emitDebugLoc() {
const DwarfCompileUnit *CU = List.CU;
for (const auto &Entry : DebugLocs.getEntries(List)) {
+ if (Entry.BeginSym == Entry.EndSym)
+ continue;
// Set up the range. This range is relative to the entry point of the
// compile unit. This is a hard coded 0 for low_pc when we're emitting
// ranges, or the DW_AT_low_pc on the compile unit otherwise.
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index a2799b8..d569827 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -65,6 +65,11 @@ void DwarfExpression::AddShr(unsigned ShiftBy) {
+void DwarfExpression::AddOpStackValue() {
+ if (DwarfVersion >= 4)
+ EmitOp(dwarf::DW_OP_stack_value);
bool DwarfExpression::AddMachineRegIndirect(unsigned MachineReg, int Offset) {
if (isFrameRegister(MachineReg)) {
// If variable offset is based in frame register then use fbreg.
@@ -172,16 +177,14 @@ void DwarfExpression::AddSignedConstant(int Value) {
// value, so the producers and consumers started to rely on heuristics
// to disambiguate the value vs. location status of the expression.
// See PR21176 for more details.
- if (DwarfVersion >= 4)
- EmitOp(dwarf::DW_OP_stack_value);
+ AddOpStackValue();
void DwarfExpression::AddUnsignedConstant(unsigned Value) {
// cf. comment in DwarfExpression::AddSignedConstant().
- if (DwarfVersion >= 4)
- EmitOp(dwarf::DW_OP_stack_value);
+ AddOpStackValue();
static unsigned getOffsetOrZero(unsigned OffsetInBits,
@@ -212,15 +215,30 @@ bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr,
getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
case dwarf::DW_OP_plus: {
- // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset].
auto N = I.getNext();
+ unsigned Offset = I->getArg(0);
+ // First combine all DW_OP_plus until we hit either a DW_OP_deref or a
+ // DW_OP_bit_piece
+ while (N != E && N->getOp() == dwarf::DW_OP_plus) {
+ Offset += N->getArg(0);
+ ++I;
+ N = I.getNext();
+ }
if (N != E && N->getOp() == dwarf::DW_OP_deref) {
- unsigned Offset = I->getArg(0);
+ // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset].
ValidReg = AddMachineRegIndirect(MachineReg, Offset);
std::advance(I, 2);
- break;
- } else
- ValidReg = AddMachineRegPiece(MachineReg);
+ } else {
+ assert ((N == E) || (N->getOp() == dwarf::DW_OP_bit_piece));
+ if (Offset == 0) {
+ ValidReg = AddMachineRegPiece(MachineReg);
+ } else {
+ ValidReg = AddMachineRegIndirect(MachineReg, Offset);
+ AddOpStackValue();
+ }
+ ++I;
+ }
+ break;
case dwarf::DW_OP_deref: {
// [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
@@ -237,6 +255,7 @@ bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr,
// Emit remaining elements of the expression.
AddExpression(I, E, PieceOffsetInBits);
return true;
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 78ec937..f6249ff 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -83,6 +83,9 @@ public:
bool AddMachineRegPiece(unsigned MachineReg, unsigned PieceSizeInBits = 0,
unsigned PieceOffsetInBits = 0);
+ /// Emit a DW_OP_stack_value
+ void AddOpStackValue();
/// Emit a signed constant.
void AddSignedConstant(int Value);
/// Emit an unsigned constant.
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 10b58d4..5ef333c 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -20,25 +20,34 @@ namespace llvm {
DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA)
: Asm(AP), StrPool(DA, *Asm, Pref) {}
-DwarfFile::~DwarfFile() {}
+DwarfFile::~DwarfFile() {
+ for (DIEAbbrev *Abbrev : Abbreviations)
+ Abbrev->~DIEAbbrev();
// Define a unique number for the abbreviation.
-void DwarfFile::assignAbbrevNumber(DIEAbbrev &Abbrev) {
- // Check the set for priors.
- DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev);
- // If it's newly added.
- if (InSet == &Abbrev) {
- // Add to abbreviation list.
- Abbreviations.push_back(&Abbrev);
- // Assign the vector position + 1 as its number.
- Abbrev.setNumber(Abbreviations.size());
- } else {
- // Assign existing abbreviation number.
- Abbrev.setNumber(InSet->getNumber());
+DIEAbbrev &DwarfFile::assignAbbrevNumber(DIE &Die) {
+ FoldingSetNodeID ID;
+ DIEAbbrev Abbrev = Die.generateAbbrev();
+ Abbrev.Profile(ID);
+ void *InsertPos;
+ if (DIEAbbrev *Existing =
+ AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) {
+ Die.setAbbrevNumber(Existing->getNumber());
+ return *Existing;
+ // Move the abbreviation to the heap and assign a number.
+ DIEAbbrev *New = new (AbbrevAllocator) DIEAbbrev(std::move(Abbrev));
+ Abbreviations.push_back(New);
+ New->setNumber(Abbreviations.size());
+ Die.setAbbrevNumber(Abbreviations.size());
+ // Store it for lookup.
+ AbbreviationsSet.InsertNode(New, InsertPos);
+ return *New;
void DwarfFile::addUnit(std::unique_ptr<DwarfUnit> U) {
@@ -83,10 +92,7 @@ void DwarfFile::computeSizeAndOffsets() {
// CU. It returns the offset after laying out the DIE.
unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) {
// Record the abbreviation.
- assignAbbrevNumber(Die.getAbbrev());
- // Get the abbreviation for this DIE.
- const DIEAbbrev &Abbrev = Die.getAbbrev();
+ const DIEAbbrev &Abbrev = assignAbbrevNumber(Die);
// Set DIE offset
@@ -94,22 +100,17 @@ unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) {
// Start the size with the size of abbreviation code.
Offset += getULEB128Size(Die.getAbbrevNumber());
- const SmallVectorImpl<DIEValue *> &Values = Die.getValues();
- const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
// Size the DIE attribute values.
- for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ for (const auto &V : Die.values())
// Size attribute value.
- Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm());
- // Get the children.
- const auto &Children = Die.getChildren();
+ Offset += V.SizeOf(Asm, V.getForm());
// Size the DIE children if any.
- if (!Children.empty()) {
+ if (Die.hasChildren()) {
+ (void)Abbrev;
assert(Abbrev.hasChildren() && "Children flag not set");
- for (auto &Child : Children)
+ for (auto &Child : Die.children())
Offset = computeSizeAndOffset(*Child, Offset);
// End of children marker.
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h
index 532ed96..8402027 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -37,6 +37,8 @@ class DwarfFile {
// Target of Dwarf emission, used for sizing of abbreviations.
AsmPrinter *Asm;
+ BumpPtrAllocator AbbrevAllocator;
// Used to uniquely define abbreviations.
FoldingSet<DIEAbbrev> AbbreviationsSet;
@@ -72,8 +74,11 @@ public:
/// \brief Compute the size and offset of all the DIEs.
void computeSizeAndOffsets();
- /// \brief Define a unique number for the abbreviation.
- void assignAbbrevNumber(DIEAbbrev &Abbrev);
+ /// Define a unique number for the abbreviation.
+ ///
+ /// Compute the abbreviation for \c Die, look up its unique number, and
+ /// return a reference to it in the uniquing table.
+ DIEAbbrev &assignAbbrevNumber(DIE &Die);
/// \brief Add a unit to the list of CUs.
void addUnit(std::unique_ptr<DwarfUnit> U);
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 04836c6..907f670 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -70,7 +70,6 @@ DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag,
DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) {
assert(UnitTag == dwarf::DW_TAG_compile_unit ||
UnitTag == dwarf::DW_TAG_type_unit);
- DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A,
@@ -89,11 +88,6 @@ DwarfUnit::~DwarfUnit() {
-DIEEntry *DwarfUnit::createDIEEntry(DIE &Entry) {
- DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry);
- return Value;
int64_t DwarfUnit::getDefaultLowerBound() const {
switch (getLanguage()) {
@@ -190,18 +184,16 @@ void DwarfUnit::insertDIE(const DINode *Desc, DIE *D) {
void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) {
if (DD->getDwarfVersion() >= 4)
- Die.addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne);
+ Die.addValue(Attribute, dwarf::DW_FORM_flag_present, DIEInteger(1));
- Die.addValue(Attribute, dwarf::DW_FORM_flag, DIEIntegerOne);
+ Die.addValue(Attribute, dwarf::DW_FORM_flag, DIEInteger(1));
void DwarfUnit::addUInt(DIE &Die, dwarf::Attribute Attribute,
Optional<dwarf::Form> Form, uint64_t Integer) {
if (!Form)
Form = DIEInteger::BestForm(false, Integer);
- DIEValue *Value = Integer == 1 ? DIEIntegerOne : new (DIEValueAllocator)
- DIEInteger(Integer);
- Die.addValue(Attribute, *Form, Value);
+ Die.addValue(Attribute, *Form, DIEInteger(Integer));
void DwarfUnit::addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer) {
@@ -212,8 +204,7 @@ void DwarfUnit::addSInt(DIE &Die, dwarf::Attribute Attribute,
Optional<dwarf::Form> Form, int64_t Integer) {
if (!Form)
Form = DIEInteger::BestForm(true, Integer);
- DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
- Die.addValue(Attribute, *Form, Value);
+ Die.addValue(Attribute, *Form, DIEInteger(Integer));
void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form,
@@ -225,14 +216,12 @@ void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
StringRef String) {
isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp,
- new (DIEValueAllocator)
DIEString(DU->getStringPool().getEntry(*Asm, String)));
void DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
const MCSymbol *Label) {
- DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
- Die.addValue(Attribute, Form, Value);
+ Die.addValue(Attribute, Form, DIELabel(Label));
void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) {
@@ -265,12 +254,12 @@ void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Hi, const MCSymbol *Lo) {
- DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
- Die.addValue(Attribute, dwarf::DW_FORM_data4, Value);
+ Die.addValue(Attribute, dwarf::DW_FORM_data4,
+ new (DIEValueAllocator) DIEDelta(Hi, Lo));
void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry) {
- addDIEEntry(Die, Attribute, createDIEEntry(Entry));
+ addDIEEntry(Die, Attribute, DIEEntry(Entry));
void DwarfUnit::addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type) {
@@ -281,13 +270,13 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type) {
addFlag(Die, dwarf::DW_AT_declaration);
Die.addValue(dwarf::DW_AT_signature, dwarf::DW_FORM_ref_sig8,
- new (DIEValueAllocator) DIETypeSignature(Type));
+ DIETypeSignature(Type));
void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
- DIEEntry *Entry) {
+ DIEEntry Entry) {
const DIE *DieCU = Die.getUnitOrNull();
- const DIE *EntryCU = Entry->getEntry().getUnitOrNull();
+ const DIE *EntryCU = Entry.getEntry().getUnitOrNull();
if (!DieCU)
// We assume that Die belongs to this CU, if it is not linked to any CU yet.
DieCU = &getUnitDie();
@@ -301,8 +290,7 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) {
assert(Tag != dwarf::DW_TAG_auto_variable &&
Tag != dwarf::DW_TAG_arg_variable);
- Parent.addChild(make_unique<DIE>((dwarf::Tag)Tag));
- DIE &Die = *Parent.getChildren().back();
+ DIE &Die = Parent.addChild(make_unique<DIE>((dwarf::Tag)Tag));
if (N)
insertDIE(N, &Die);
return Die;
@@ -471,7 +459,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
// Decode the original location, and use that as the start of the byref
// variable's location.
- DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
bool validReg;
if (Location.isReg())
@@ -588,7 +576,7 @@ static uint64_t getBaseTypeSize(DwarfDebug *DD, const DIDerivedType *Ty) {
void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) {
assert(MO.isFPImm() && "Invalid machine operand!");
- DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock;
APFloat FPImm = MO.getFPImm()->getValueAPF();
// Get the raw data form of the floating point.
@@ -644,7 +632,7 @@ void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
- DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock;
// Get the raw data form of the large APInt.
const uint64_t *Ptr64 = Val.getRawData();
@@ -777,22 +765,7 @@ void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
void DwarfUnit::addType(DIE &Entity, const DIType *Ty,
dwarf::Attribute Attribute) {
assert(Ty && "Trying to add a type that doesn't exist?");
- // Check for pre-existence.
- DIEEntry *Entry = getDIEEntry(Ty);
- // If it exists then use the existing value.
- if (Entry) {
- addDIEEntry(Entity, Attribute, Entry);
- return;
- }
- // Construct type.
- DIE *Buffer = getOrCreateTypeDIE(Ty);
- // Set up proxy.
- Entry = createDIEEntry(*Buffer);
- insertDIEEntry(Ty, Entry);
- addDIEEntry(Entity, Attribute, Entry);
+ addDIEEntry(Entity, Attribute, DIEEntry(*getOrCreateTypeDIE(Ty)));
std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
@@ -969,12 +942,6 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (unsigned PropertyAttributes = Property->getAttributes())
addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None,
- DIEEntry *Entry = getDIEEntry(Element);
- if (!Entry) {
- Entry = createDIEEntry(ElemDie);
- insertDIEEntry(Element, Entry);
- }
@@ -1061,7 +1028,7 @@ void DwarfUnit::constructTemplateValueParameterDIE(
else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) {
// For declaration non-type template parameters (such as global values and
// functions)
- DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
addOpAddress(*Loc, Asm->getSymbol(GV));
// Emit DW_OP_stack_value to use the address as the immediate value of the
// parameter, rather than a pointer to it.
@@ -1354,7 +1321,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
// expression to extract appropriate offset from vtable.
// BaseAddr = ObAddr + *((*ObAddr) - Offset)
- DIELoc *VBaseLocationDie = new (DIEValueAllocator) DIELoc();
+ DIELoc *VBaseLocationDie = new (DIEValueAllocator) DIELoc;
addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
@@ -1393,7 +1360,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
OffsetInBytes = DT->getOffsetInBits() >> 3;
if (DD->getDwarfVersion() <= 2) {
- DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc();
+ DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc;
addUInt(*MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
addUInt(*MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes);
addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie);
@@ -1417,10 +1384,10 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
// Objective-C properties.
- if (MDNode *PNode = DT->getObjCProperty())
- if (DIEEntry *PropertyDie = getDIEEntry(PNode))
+ if (DINode *PNode = DT->getObjCProperty())
+ if (DIE *PDie = getDIE(PNode))
MemberDie.addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4,
- PropertyDie);
+ DIEEntry(*PDie));
if (DT->isArtificial())
addFlag(MemberDie, dwarf::DW_AT_artificial);
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 0d01a9e..f56c9b4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -93,10 +93,6 @@ protected:
/// information entries.
DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
- /// Tracks the mapping of unit level debug information descriptors to debug
- /// information entries using a DIEEntry proxy.
- DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
/// A list of all the DIEBlocks in use.
std::vector<DIEBlock *> DIEBlocks;
@@ -111,9 +107,6 @@ protected:
// All DIEValues are allocated through this allocator.
BumpPtrAllocator DIEValueAllocator;
- // A preallocated DIEValue because 1 is used frequently.
- DIEInteger *DIEIntegerOne;
/// The section this unit will be emitted in.
MCSection *Section;
@@ -150,7 +143,7 @@ public:
void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
/// \brief Return true if this compile unit has something to write out.
- bool hasContent() const { return !UnitDie.getChildren().empty(); }
+ bool hasContent() const { return UnitDie.hasChildren(); }
/// \brief Get string containing language specific context for a global name.
@@ -180,7 +173,7 @@ public:
DIE *getDIE(const DINode *D) const;
/// \brief Returns a fresh newly allocated DIELoc.
- DIELoc *getDIELoc() { return new (DIEValueAllocator) DIELoc(); }
+ DIELoc *getDIELoc() { return new (DIEValueAllocator) DIELoc; }
/// \brief Insert DIE into the map.
@@ -233,7 +226,7 @@ public:
void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry);
/// \brief Add a DIE attribute data and value.
- void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry *Entry);
+ void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry);
void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type);
@@ -369,26 +362,12 @@ private:
/// If the DWARF version doesn't handle the language, return -1.
int64_t getDefaultLowerBound() const;
- /// \brief Returns the DIE entry for the specified debug variable.
- DIEEntry *getDIEEntry(const MDNode *N) const {
- return MDNodeToDIEEntryMap.lookup(N);
- }
- /// \brief Insert debug information entry into the map.
- void insertDIEEntry(const MDNode *N, DIEEntry *E) {
- MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
- }
/// \brief Get an anonymous type for index type.
DIE *getIndexTyDie();
/// \brief Set D as anonymous type for index which can be reused later.
void setIndexTyDie(DIE *D) { IndexTyDie = D; }
- /// \brief Creates a new DIEEntry to be a proxy for a debug information
- /// entry.
- DIEEntry *createDIEEntry(DIE &Entry);
/// If this is a named finished type then include it in the list of types for
/// the accelerator tables.
void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index 371e20a..535b1f6 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -171,10 +171,10 @@ static void EmitLabelDiff(MCStreamer &Streamer,
unsigned int Size = 4) {
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
MCContext &Context = Streamer.getContext();
- const MCExpr *FromRef = MCSymbolRefExpr::Create(From, Variant, Context),
- *ToRef = MCSymbolRefExpr::Create(To, Variant, Context);
+ const MCExpr *FromRef = MCSymbolRefExpr::create(From, Variant, Context),
+ *ToRef = MCSymbolRefExpr::create(To, Variant, Context);
const MCExpr *AddrDelta =
- MCBinaryExpr::Create(MCBinaryExpr::Sub, ToRef, FromRef, Context);
+ MCBinaryExpr::create(MCBinaryExpr::Sub, ToRef, FromRef, Context);
Streamer.EmitValue(AddrDelta, Size);
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp
index dc6df9c..f166350 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -1,4 +1,4 @@
-//===-- CodeGen/AsmPrinter/Win64Exception.cpp - Dwarf Exception Impl ------===//
+//===-- CodeGen/AsmPrinter/WinException.cpp - Dwarf Exception Impl ------===//
// The LLVM Compiler Infrastructure
@@ -11,7 +11,7 @@
-#include "Win64Exception.h"
+#include "WinException.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
@@ -29,6 +29,7 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCWin64EH.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
@@ -38,28 +39,33 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
-Win64Exception::Win64Exception(AsmPrinter *A)
- : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false),
- shouldEmitMoves(false) {}
+WinException::WinException(AsmPrinter *A) : EHStreamer(A) {
+ // MSVC's EH tables are always composed of 32-bit words. All known 64-bit
+ // platforms use an imagerel32 relocation to refer to symbols.
+ useImageRel32 = (A->getDataLayout().getPointerSizeInBits() == 64);
-Win64Exception::~Win64Exception() {}
+WinException::~WinException() {}
/// endModule - Emit all exception information that should come after the
/// content.
-void Win64Exception::endModule() {
+void WinException::endModule() {
-void Win64Exception::beginFunction(const MachineFunction *MF) {
+void WinException::beginFunction(const MachineFunction *MF) {
shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
// If any landing pads survive, we need an EH table.
bool hasLandingPads = !MMI->getLandingPads().empty();
+ const Function *F = MF->getFunction();
+ const Function *ParentF = MMI->getWinEHParent(F);
shouldEmitMoves = Asm->needsSEHMoves();
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
unsigned PerEncoding = TLOF.getPersonalityEncoding();
- const Function *Per = MF->getMMI().getPersonality();
+ const Function *Per = MMI->getPersonality();
shouldEmitPersonality = hasLandingPads &&
PerEncoding != dwarf::DW_EH_PE_omit && Per;
@@ -68,12 +74,17 @@ void Win64Exception::beginFunction(const MachineFunction *MF) {
shouldEmitLSDA = shouldEmitPersonality &&
LSDAEncoding != dwarf::DW_EH_PE_omit;
+ // If we're not using CFI, we don't want the CFI or the personality. Emit the
+ // LSDA if this is the parent function.
+ if (!Asm->MAI->usesWindowsCFI()) {
+ shouldEmitLSDA = (hasLandingPads && F == ParentF);
+ shouldEmitPersonality = false;
+ return;
+ }
// If this was an outlined handler, we need to define the label corresponding
// to the offset of the parent frame relative to the stack pointer after the
// prologue.
- const Function *F = MF->getFunction();
- const Function *ParentF = MMI->getWinEHParent(F);
if (F != ParentF) {
WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF);
auto I = FuncInfo.CatchHandlerParentFrameObjOffset.find(F);
@@ -85,27 +96,24 @@ void Win64Exception::beginFunction(const MachineFunction *MF) {
// Emit a symbol assignment.
- MCConstantExpr::Create(I->second, Asm->OutContext));
+ MCConstantExpr::create(I->second, Asm->OutContext));
- if (!shouldEmitPersonality && !shouldEmitMoves)
- return;
+ if (shouldEmitMoves || shouldEmitPersonality)
+ Asm->OutStreamer->EmitWinCFIStartProc(Asm->CurrentFnSym);
- Asm->OutStreamer->EmitWinCFIStartProc(Asm->CurrentFnSym);
- if (!shouldEmitPersonality)
- return;
- const MCSymbol *PersHandlerSym =
- TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
- Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true);
+ if (shouldEmitPersonality) {
+ const MCSymbol *PersHandlerSym =
+ TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
+ Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true);
+ }
/// endFunction - Gather and emit post-function exception information.
-void Win64Exception::endFunction(const MachineFunction *MF) {
- if (!shouldEmitPersonality && !shouldEmitMoves)
+void WinException::endFunction(const MachineFunction *MF) {
+ if (!shouldEmitPersonality && !shouldEmitMoves && !shouldEmitLSDA)
EHPersonality Per = MMI->getPersonalityType();
@@ -116,16 +124,27 @@ void Win64Exception::endFunction(const MachineFunction *MF) {
if (!isMSVCEHPersonality(Per))
- if (shouldEmitPersonality) {
+ if (shouldEmitPersonality || shouldEmitLSDA) {
- // Emit an UNWIND_INFO struct describing the prologue.
- Asm->OutStreamer->EmitWinEHHandlerData();
+ if (shouldEmitMoves || shouldEmitPersonality) {
+ // Emit an UNWIND_INFO struct describing the prologue.
+ Asm->OutStreamer->EmitWinEHHandlerData();
+ } else {
+ // Just switch sections to the right xdata section. This use of
+ // CurrentFnSym assumes that we only emit the LSDA when ending the parent
+ // function.
+ MCSection *XData = WinEH::UnwindEmitter::getXDataSection(
+ Asm->CurrentFnSym, Asm->OutContext);
+ Asm->OutStreamer->SwitchSection(XData);
+ }
// Emit the tables appropriate to the personality function in use. If we
// don't recognize the personality, assume it uses an Itanium-style LSDA.
if (Per == EHPersonality::MSVC_Win64SEH)
+ else if (Per == EHPersonality::MSVC_X86SEH)
+ emitCSpecificHandlerTable(); // FIXME
else if (Per == EHPersonality::MSVC_CXX)
@@ -133,20 +152,24 @@ void Win64Exception::endFunction(const MachineFunction *MF) {
- Asm->OutStreamer->EmitWinCFIEndProc();
+ if (shouldEmitMoves)
+ Asm->OutStreamer->EmitWinCFIEndProc();
-const MCExpr *Win64Exception::createImageRel32(const MCSymbol *Value) {
+const MCExpr *WinException::create32bitRef(const MCSymbol *Value) {
if (!Value)
- return MCConstantExpr::Create(0, Asm->OutContext);
- return MCSymbolRefExpr::Create(Value, MCSymbolRefExpr::VK_COFF_IMGREL32,
+ return MCConstantExpr::create(0, Asm->OutContext);
+ return MCSymbolRefExpr::create(Value, useImageRel32
+ ? MCSymbolRefExpr::VK_COFF_IMGREL32
+ : MCSymbolRefExpr::VK_None,
-const MCExpr *Win64Exception::createImageRel32(const GlobalValue *GV) {
+const MCExpr *WinException::create32bitRef(const GlobalValue *GV) {
if (!GV)
- return MCConstantExpr::Create(0, Asm->OutContext);
- return createImageRel32(Asm->getSymbol(GV));
+ return MCConstantExpr::create(0, Asm->OutContext);
+ return create32bitRef(Asm->getSymbol(GV));
/// Emit the language-specific data that __C_specific_handler expects. This
@@ -177,7 +200,7 @@ const MCExpr *Win64Exception::createImageRel32(const GlobalValue *GV) {
/// imagerel32 LabelLPad; // Zero means __finally.
/// } Entries[NumEntries];
/// };
-void Win64Exception::emitCSpecificHandlerTable() {
+void WinException::emitCSpecificHandlerTable() {
const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
// Simplifying assumptions for first implementation:
@@ -227,16 +250,16 @@ void Win64Exception::emitCSpecificHandlerTable() {
// Compute the label range. We may reuse the function begin and end labels
// rather than forming new ones.
const MCExpr *Begin =
- createImageRel32(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym);
+ create32bitRef(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym);
const MCExpr *End;
if (CSE.EndLabel) {
// The interval is half-open, so we have to add one to include the return
// address of the last invoke in the range.
- End = MCBinaryExpr::CreateAdd(createImageRel32(CSE.EndLabel),
- MCConstantExpr::Create(1, Asm->OutContext),
+ End = MCBinaryExpr::createAdd(create32bitRef(CSE.EndLabel),
+ MCConstantExpr::create(1, Asm->OutContext),
} else {
- End = createImageRel32(EHFuncEndSym);
+ End = create32bitRef(EHFuncEndSym);
// Emit an entry for each action.
@@ -248,7 +271,7 @@ void Win64Exception::emitCSpecificHandlerTable() {
// emit '1' to indicate a catch-all.
const Function *F = Handler.FilterOrFinally;
if (F)
- Asm->OutStreamer->EmitValue(createImageRel32(Asm->getSymbol(F)), 4);
+ Asm->OutStreamer->EmitValue(create32bitRef(Asm->getSymbol(F)), 4);
Asm->OutStreamer->EmitIntValue(1, 4);
@@ -257,14 +280,14 @@ void Win64Exception::emitCSpecificHandlerTable() {
const BlockAddress *BA = Handler.RecoverBA;
if (BA)
- createImageRel32(Asm->GetBlockAddressSymbol(BA)), 4);
+ create32bitRef(Asm->GetBlockAddressSymbol(BA)), 4);
Asm->OutStreamer->EmitIntValue(0, 4);
-void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) {
+void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
const Function *F = MF->getFunction();
const Function *ParentF = MMI->getWinEHParent(F);
auto &OS = *Asm->OutStreamer;
@@ -273,91 +296,26 @@ void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) {
StringRef ParentLinkageName =
- MCSymbol *FuncInfoXData =
- Asm->OutContext.getOrCreateSymbol(Twine("$cppxdata$", ParentLinkageName));
- OS.EmitValue(createImageRel32(FuncInfoXData), 4);
- // The Itanium LSDA table sorts similar landing pads together to simplify the
- // actions table, but we don't need that.
- SmallVector<const LandingPadInfo *, 64> LandingPads;
- const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
- LandingPads.reserve(PadInfos.size());
- for (const auto &LP : PadInfos)
- LandingPads.push_back(&LP);
- RangeMapType PadMap;
- computePadMap(LandingPads, PadMap);
- // The end label of the previous invoke or nounwind try-range.
- MCSymbol *LastLabel = Asm->getFunctionBegin();
- // Whether there is a potentially throwing instruction (currently this means
- // an ordinary call) between the end of the previous try-range and now.
- bool SawPotentiallyThrowing = false;
- int LastEHState = -2;
- // The parent function and the catch handlers contribute to the 'ip2state'
- // table.
- // Include ip2state entries for the beginning of the main function and
- // for catch handler functions.
- if (F == ParentF) {
- FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1));
- LastEHState = -1;
- } else if (FuncInfo.HandlerBaseState.count(F)) {
- FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel,
- FuncInfo.HandlerBaseState[F]));
- LastEHState = FuncInfo.HandlerBaseState[F];
- }
- for (const auto &MBB : *MF) {
- for (const auto &MI : MBB) {
- if (!MI.isEHLabel()) {
- if (MI.isCall())
- SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI);
- continue;
- }
- // End of the previous try-range?
- MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol();
- if (BeginLabel == LastLabel)
- SawPotentiallyThrowing = false;
- // Beginning of a new try-range?
- RangeMapType::const_iterator L = PadMap.find(BeginLabel);
- if (L == PadMap.end())
- // Nope, it was just some random label.
- continue;
- const PadRange &P = L->second;
- const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
- assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
- "Inconsistent landing pad map!");
- // FIXME: Should this be using FuncInfo.HandlerBaseState?
- if (SawPotentiallyThrowing && LastEHState != -1) {
- FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1));
- SawPotentiallyThrowing = false;
- LastEHState = -1;
- }
- if (LandingPad->WinEHState != LastEHState)
- FuncInfo.IPToStateList.push_back(
- std::make_pair(BeginLabel, LandingPad->WinEHState));
- LastEHState = LandingPad->WinEHState;
- LastLabel = LandingPad->EndLabels[P.RangeIndex];
- }
+ MCSymbol *FuncInfoXData = nullptr;
+ if (shouldEmitPersonality) {
+ FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
+ Twine("$cppxdata$", ParentLinkageName));
+ OS.EmitValue(create32bitRef(FuncInfoXData), 4);
+ extendIP2StateTable(MF, ParentF, FuncInfo);
+ // Defer emission until we've visited the parent function and all the catch
+ // handlers. Cleanups don't contribute to the ip2state table, so don't count
+ // them.
+ if (ParentF != F && !FuncInfo.CatchHandlerMaxState.count(F))
+ return;
+ ++FuncInfo.NumIPToStateFuncsVisited;
+ if (FuncInfo.NumIPToStateFuncsVisited != FuncInfo.CatchHandlerMaxState.size())
+ return;
+ } else {
+ FuncInfoXData = Asm->OutContext.getOrCreateLSDASymbol(ParentLinkageName);
- // Defer emission until we've visited the parent function and all the catch
- // handlers. Cleanups don't contribute to the ip2state table yet, so don't
- // count them.
- if (ParentF != F && !FuncInfo.CatchHandlerMaxState.count(F))
- return;
- ++FuncInfo.NumIPToStateFuncsVisited;
- if (FuncInfo.NumIPToStateFuncsVisited != FuncInfo.CatchHandlerMaxState.size())
- return;
MCSymbol *UnwindMapXData = nullptr;
MCSymbol *TryBlockMapXData = nullptr;
MCSymbol *IPToStateXData = nullptr;
@@ -377,9 +335,9 @@ void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) {
// UnwindMapEntry *UnwindMap;
// uint32_t NumTryBlocks;
// TryBlockMapEntry *TryBlockMap;
- // uint32_t IPMapEntries;
- // IPToStateMapEntry *IPToStateMap;
- // uint32_t UnwindHelp; // (x64/ARM only)
+ // uint32_t IPMapEntries; // always 0 for x86
+ // IPToStateMapEntry *IPToStateMap; // always 0 for x86
+ // uint32_t UnwindHelp; // non-x86 only
// ESTypeList *ESTypeList;
// int32_t EHFlags;
// }
@@ -389,12 +347,13 @@ void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) {
OS.EmitIntValue(0x19930522, 4); // MagicNumber
OS.EmitIntValue(FuncInfo.UnwindMap.size(), 4); // MaxState
- OS.EmitValue(createImageRel32(UnwindMapXData), 4); // UnwindMap
+ OS.EmitValue(create32bitRef(UnwindMapXData), 4); // UnwindMap
OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4); // NumTryBlocks
- OS.EmitValue(createImageRel32(TryBlockMapXData), 4); // TryBlockMap
+ OS.EmitValue(create32bitRef(TryBlockMapXData), 4); // TryBlockMap
OS.EmitIntValue(FuncInfo.IPToStateList.size(), 4); // IPMapEntries
- OS.EmitValue(createImageRel32(IPToStateXData), 4); // IPToStateMap
- OS.EmitIntValue(FuncInfo.UnwindHelpFrameOffset, 4); // UnwindHelp
+ OS.EmitValue(create32bitRef(IPToStateXData), 4); // IPToStateMap
+ if (Asm->MAI->usesWindowsCFI())
+ OS.EmitIntValue(FuncInfo.UnwindHelpFrameOffset, 4); // UnwindHelp
OS.EmitIntValue(0, 4); // ESTypeList
OS.EmitIntValue(1, 4); // EHFlags
@@ -406,7 +365,7 @@ void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) {
for (const WinEHUnwindMapEntry &UME : FuncInfo.UnwindMap) {
OS.EmitIntValue(UME.ToState, 4); // ToState
- OS.EmitValue(createImageRel32(UME.Cleanup), 4); // Action
+ OS.EmitValue(create32bitRef(UME.Cleanup), 4); // Action
@@ -443,7 +402,7 @@ void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) {
OS.EmitIntValue(TBME.TryHigh, 4); // TryHigh
OS.EmitIntValue(CatchHigh, 4); // CatchHigh
OS.EmitIntValue(TBME.HandlerArray.size(), 4); // NumCatches
- OS.EmitValue(createImageRel32(HandlerMapXData), 4); // HandlerArray
+ OS.EmitValue(create32bitRef(HandlerMapXData), 4); // HandlerArray
for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
@@ -460,12 +419,6 @@ void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) {
// };
for (const WinEHHandlerType &HT : TBME.HandlerArray) {
- MCSymbol *ParentFrameOffset =
- Asm->OutContext.getOrCreateParentFrameOffsetSymbol(
- GlobalValue::getRealLinkageName(HT.Handler->getName()));
- const MCSymbolRefExpr *ParentFrameOffsetRef = MCSymbolRefExpr::Create(
- ParentFrameOffset, MCSymbolRefExpr::VK_None, Asm->OutContext);
// Get the frame escape label with the offset of the catch object. If
// the index is -1, then there is no catch object, and we should emit an
// offset of zero, indicating that no copy will occur.
@@ -475,17 +428,25 @@ void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) {
- FrameAllocOffsetRef = MCSymbolRefExpr::Create(
+ FrameAllocOffsetRef = MCSymbolRefExpr::create(
FrameAllocOffset, MCSymbolRefExpr::VK_None, Asm->OutContext);
} else {
- FrameAllocOffsetRef = MCConstantExpr::Create(0, Asm->OutContext);
+ FrameAllocOffsetRef = MCConstantExpr::create(0, Asm->OutContext);
OS.EmitIntValue(HT.Adjectives, 4); // Adjectives
- OS.EmitValue(createImageRel32(HT.TypeDescriptor), 4); // Type
+ OS.EmitValue(create32bitRef(HT.TypeDescriptor), 4); // Type
OS.EmitValue(FrameAllocOffsetRef, 4); // CatchObjOffset
- OS.EmitValue(createImageRel32(HT.Handler), 4); // Handler
- OS.EmitValue(ParentFrameOffsetRef, 4); // ParentFrameOffset
+ OS.EmitValue(create32bitRef(HT.Handler), 4); // Handler
+ if (shouldEmitPersonality) {
+ MCSymbol *ParentFrameOffset =
+ Asm->OutContext.getOrCreateParentFrameOffsetSymbol(
+ GlobalValue::getRealLinkageName(HT.Handler->getName()));
+ const MCSymbolRefExpr *ParentFrameOffsetRef = MCSymbolRefExpr::create(
+ ParentFrameOffset, MCSymbolRefExpr::VK_None, Asm->OutContext);
+ OS.EmitValue(ParentFrameOffsetRef, 4); // ParentFrameOffset
+ }
@@ -497,8 +458,86 @@ void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) {
if (IPToStateXData) {
for (auto &IPStatePair : FuncInfo.IPToStateList) {
- OS.EmitValue(createImageRel32(IPStatePair.first), 4); // IP
+ OS.EmitValue(create32bitRef(IPStatePair.first), 4); // IP
OS.EmitIntValue(IPStatePair.second, 4); // State
+void WinException::extendIP2StateTable(const MachineFunction *MF,
+ const Function *ParentF,
+ WinEHFuncInfo &FuncInfo) {
+ const Function *F = MF->getFunction();
+ // The Itanium LSDA table sorts similar landing pads together to simplify the
+ // actions table, but we don't need that.
+ SmallVector<const LandingPadInfo *, 64> LandingPads;
+ const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+ LandingPads.reserve(PadInfos.size());
+ for (const auto &LP : PadInfos)
+ LandingPads.push_back(&LP);
+ RangeMapType PadMap;
+ computePadMap(LandingPads, PadMap);
+ // The end label of the previous invoke or nounwind try-range.
+ MCSymbol *LastLabel = Asm->getFunctionBegin();
+ // Whether there is a potentially throwing instruction (currently this means
+ // an ordinary call) between the end of the previous try-range and now.
+ bool SawPotentiallyThrowing = false;
+ int LastEHState = -2;
+ // The parent function and the catch handlers contribute to the 'ip2state'
+ // table.
+ // Include ip2state entries for the beginning of the main function and
+ // for catch handler functions.
+ if (F == ParentF) {
+ FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1));
+ LastEHState = -1;
+ } else if (FuncInfo.HandlerBaseState.count(F)) {
+ FuncInfo.IPToStateList.push_back(
+ std::make_pair(LastLabel, FuncInfo.HandlerBaseState[F]));
+ LastEHState = FuncInfo.HandlerBaseState[F];
+ }
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
+ if (!MI.isEHLabel()) {
+ if (MI.isCall())
+ SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI);
+ continue;
+ }
+ // End of the previous try-range?
+ MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol();
+ if (BeginLabel == LastLabel)
+ SawPotentiallyThrowing = false;
+ // Beginning of a new try-range?
+ RangeMapType::const_iterator L = PadMap.find(BeginLabel);
+ if (L == PadMap.end())
+ // Nope, it was just some random label.
+ continue;
+ const PadRange &P = L->second;
+ const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+ assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+ "Inconsistent landing pad map!");
+ // FIXME: Should this be using FuncInfo.HandlerBaseState?
+ if (SawPotentiallyThrowing && LastEHState != -1) {
+ FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1));
+ SawPotentiallyThrowing = false;
+ LastEHState = -1;
+ }
+ if (LandingPad->WinEHState != LastEHState)
+ FuncInfo.IPToStateList.push_back(
+ std::make_pair(BeginLabel, LandingPad->WinEHState));
+ LastEHState = LandingPad->WinEHState;
+ LastLabel = LandingPad->EndLabels[P.RangeIndex];
+ }
+ }
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.h b/lib/CodeGen/AsmPrinter/WinException.h
index 5f4237f..478899b 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.h
+++ b/lib/CodeGen/AsmPrinter/WinException.h
@@ -1,4 +1,4 @@
-//===-- Win64Exception.h - Windows Exception Handling ----------*- C++ -*--===//
+//===-- WinException.h - Windows Exception Handling ----------*- C++ -*--===//
// The LLVM Compiler Infrastructure
@@ -17,33 +17,41 @@
#include "EHStreamer.h"
namespace llvm {
+class Function;
class GlobalValue;
class MachineFunction;
class MCExpr;
+struct WinEHFuncInfo;
-class Win64Exception : public EHStreamer {
+class WinException : public EHStreamer {
/// Per-function flag to indicate if personality info should be emitted.
- bool shouldEmitPersonality;
+ bool shouldEmitPersonality = false;
/// Per-function flag to indicate if the LSDA should be emitted.
- bool shouldEmitLSDA;
+ bool shouldEmitLSDA = false;
/// Per-function flag to indicate if frame moves info should be emitted.
- bool shouldEmitMoves;
+ bool shouldEmitMoves = false;
+ /// True if this is a 64-bit target and we should use image relative offsets.
+ bool useImageRel32 = false;
void emitCSpecificHandlerTable();
void emitCXXFrameHandler3Table(const MachineFunction *MF);
- const MCExpr *createImageRel32(const MCSymbol *Value);
- const MCExpr *createImageRel32(const GlobalValue *GV);
+ void extendIP2StateTable(const MachineFunction *MF, const Function *ParentF,
+ WinEHFuncInfo &FuncInfo);
+ const MCExpr *create32bitRef(const MCSymbol *Value);
+ const MCExpr *create32bitRef(const GlobalValue *GV);
// Main entry points.
- Win64Exception(AsmPrinter *A);
- ~Win64Exception() override;
+ WinException(AsmPrinter *A);
+ ~WinException() override;
/// Emit all exception information that should come after the content.
void endModule() override;
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 9fc3e0b..6d2af90 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -71,6 +71,7 @@ add_llvm_library(LLVMCodeGen
+ MIRPrintingPass.cpp
@@ -129,3 +130,4 @@ add_dependencies(LLVMCodeGen intrinsics_gen)
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 2c1858b..6a81403 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -170,7 +170,8 @@ class TypePromotionTransaction;
void EliminateMostlyEmptyBlock(BasicBlock *BB);
bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT);
bool OptimizeInst(Instruction *I, bool& ModifiedDT);
- bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
+ bool OptimizeMemoryInst(Instruction *I, Value *Addr,
+ Type *AccessTy, unsigned AS);
bool OptimizeInlineAsmInst(CallInst *CS);
bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT);
bool MoveExtToFormExtLoad(Instruction *&I);
@@ -1410,11 +1411,15 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
if (TLI) {
+ // Unknown address space.
+ // TODO: Target hook to pick which address space the intrinsic cares
+ // about?
+ unsigned AddrSpace = ~0u;
SmallVector<Value*, 2> PtrOps;
Type *AccessTy;
- if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
+ if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace))
while (!PtrOps.empty())
- if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
+ if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace))
return true;
@@ -2095,6 +2100,7 @@ class AddressingModeMatcher {
/// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
/// the memory instruction that we're computing this address for.
Type *AccessTy;
+ unsigned AddrSpace;
Instruction *MemoryInst;
/// AddrMode - This is the addressing mode that we're building up. This is
@@ -2114,14 +2120,15 @@ class AddressingModeMatcher {
bool IgnoreProfitability;
AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
- const TargetMachine &TM, Type *AT, Instruction *MI,
- ExtAddrMode &AM, const SetOfInstrs &InsertedTruncs,
+ const TargetMachine &TM, Type *AT, unsigned AS,
+ Instruction *MI, ExtAddrMode &AM,
+ const SetOfInstrs &InsertedTruncs,
InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT)
: AddrModeInsts(AMI), TM(TM),
- AccessTy(AT), MemoryInst(MI), AddrMode(AM),
+ AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) {
IgnoreProfitability = false;
@@ -2135,7 +2142,7 @@ public:
/// optimizations.
/// \p PromotedInsts maps the instructions to their type before promotion.
/// \p The ongoing transaction where every action should be registered.
- static ExtAddrMode Match(Value *V, Type *AccessTy,
+ static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS,
Instruction *MemoryInst,
SmallVectorImpl<Instruction*> &AddrModeInsts,
const TargetMachine &TM,
@@ -2144,7 +2151,7 @@ public:
TypePromotionTransaction &TPT) {
ExtAddrMode Result;
- bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy,
+ bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS,
MemoryInst, Result, InsertedTruncs,
PromotedInsts, TPT).MatchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
@@ -2190,7 +2197,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
TestAddrMode.ScaledReg = ScaleReg;
// If the new address isn't legal, bail out.
- if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
+ if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy, AddrSpace))
return false;
// It was legal, so commit it.
@@ -2207,7 +2214,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
// If this addressing mode is legal, commit it and remember that we folded
// this instruction.
- if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
+ if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy, AddrSpace)) {
AddrMode = TestAddrMode;
return true;
@@ -2771,7 +2778,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
// just add it to the disp field and check validity.
if (VariableOperand == -1) {
AddrMode.BaseOffs += ConstantOffset;
- if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
+ if (ConstantOffset == 0 ||
+ TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) {
// Check to see if we can fold the base pointer in too.
if (MatchAddr(AddrInst->getOperand(0), Depth+1))
return true;
@@ -2894,14 +2902,14 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
// Fold in immediates if legal for the target.
AddrMode.BaseOffs += CI->getSExtValue();
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace))
return true;
AddrMode.BaseOffs -= CI->getSExtValue();
} else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
// If this is a global variable, try to fold it into the addressing mode.
if (!AddrMode.BaseGV) {
AddrMode.BaseGV = GV;
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace))
return true;
AddrMode.BaseGV = nullptr;
@@ -2945,7 +2953,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
AddrMode.HasBaseReg = true;
AddrMode.BaseReg = Addr;
// Still check for legality in case the target supports [imm] but not [i+r].
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace))
return true;
AddrMode.HasBaseReg = false;
AddrMode.BaseReg = nullptr;
@@ -2955,7 +2963,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
if (AddrMode.Scale == 0) {
AddrMode.Scale = 1;
AddrMode.ScaledReg = Addr;
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace))
return true;
AddrMode.Scale = 0;
AddrMode.ScaledReg = nullptr;
@@ -3136,9 +3144,11 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// Get the access type of this use. If the use isn't a pointer, we don't
// know what it accesses.
Value *Address = User->getOperand(OpNo);
- if (!Address->getType()->isPointerTy())
+ PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
+ if (!AddrTy)
return false;
- Type *AddressAccessTy = Address->getType()->getPointerElementType();
+ Type *AddressAccessTy = AddrTy->getElementType();
+ unsigned AS = AddrTy->getAddressSpace();
// Do a match against the root of this address, ignoring profitability. This
// will tell us if the addressing mode for the memory operation will
@@ -3146,7 +3156,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
ExtAddrMode Result;
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
- AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy,
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, AS,
MemoryInst, Result, InsertedTruncs,
PromotedInsts, TPT);
Matcher.IgnoreProfitability = true;
@@ -3189,7 +3199,7 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
/// This method is used to optimize both load/store and inline asms with memory
/// operands.
bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
- Type *AccessTy) {
+ Type *AccessTy, unsigned AddrSpace) {
Value *Repl = Addr;
// Try to collapse single-value PHI nodes. This is necessary to undo
@@ -3229,8 +3239,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// For non-PHIs, determine the addressing mode being computed.
SmallVector<Instruction*, 16> NewAddrModeInsts;
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
- V, AccessTy, MemoryInst, NewAddrModeInsts, *TM, InsertedTruncsSet,
- PromotedInsts, TPT);
+ V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM,
+ InsertedTruncsSet, PromotedInsts, TPT);
// This check is broken into two cases with very similar code to avoid using
// getNumUses() as much as possible. Some values have a lot of uses, so
@@ -3545,7 +3555,7 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
Value *OpVal = CS->getArgOperand(ArgNo++);
- MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
+ MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
} else if (OpInfo.Type == InlineAsm::isInput)
@@ -4394,15 +4404,19 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
return OptimizeCmpExpression(CI);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (TLI)
- return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
+ if (TLI) {
+ unsigned AS = LI->getPointerAddressSpace();
+ return OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
+ }
return false;
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (TLI)
+ if (TLI) {
+ unsigned AS = SI->getPointerAddressSpace();
return OptimizeMemoryInst(I, SI->getOperand(1),
- SI->getOperand(0)->getType());
+ SI->getOperand(0)->getType(), AS);
+ }
return false;
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 3d62d48..dba280f 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -71,7 +71,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// all callee-saved registers. In non-return this is any
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo *MFI = MF.getFrameInfo();
- BitVector Pristine = MFI->getPristineRegs(BB);
+ BitVector Pristine = MFI->getPristineRegs(MF);
for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
if (!IsReturnBlock && !Pristine.test(*I)) continue;
for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index 092b7f8..d3687b9 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -226,21 +226,21 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
// Check for any dependencies on Head instructions.
- for (MIOperands MO(I); MO.isValid(); ++MO) {
- if (MO->isRegMask()) {
+ for (const MachineOperand &MO : I->operands()) {
+ if (MO.isRegMask()) {
DEBUG(dbgs() << "Won't speculate regmask: " << *I);
return false;
- if (!MO->isReg())
+ if (!MO.isReg())
- unsigned Reg = MO->getReg();
+ unsigned Reg = MO.getReg();
// Remember clobbered regunits.
- if (MO->isDef() && TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (MO.isDef() && TargetRegisterInfo::isPhysicalRegister(Reg))
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
- if (!MO->readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!MO.readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg))
MachineInstr *DefMI = MRI->getVRegDef(Reg);
if (!DefMI || DefMI->getParent() != Head)
@@ -284,19 +284,19 @@ bool SSAIfConv::findInsertionPoint() {
// Update live regunits.
- for (MIOperands MO(I); MO.isValid(); ++MO) {
+ for (const MachineOperand &MO : I->operands()) {
// We're ignoring regmask operands. That is conservatively correct.
- if (!MO->isReg())
+ if (!MO.isReg())
- unsigned Reg = MO->getReg();
+ unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isPhysicalRegister(Reg))
// I clobbers Reg, so it isn't live before I.
- if (MO->isDef())
+ if (MO.isDef())
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
// Unless I reads Reg.
- if (MO->readsReg())
+ if (MO.readsReg())
// Anything read by I is live before I.
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 79de175..37b3bf1 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -124,6 +124,12 @@ namespace {
// for more information.
unsigned MaxOffset;
+ /// Whether we should try to optimize for size only.
+ /// Currently, this applies a dead simple heuristic: only consider globals
+ /// used in minsize functions for merging.
+ /// FIXME: This could learn about optsize, and be used in the cost model.
+ bool OnlyOptimizeForSize;
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const;
/// \brief Merge everything in \p Globals for which the corresponding bit
@@ -152,9 +158,10 @@ namespace {
static char ID; // Pass identification, replacement for typeid.
explicit GlobalMerge(const TargetMachine *TM = nullptr,
- unsigned MaximalOffset = 0)
+ unsigned MaximalOffset = 0,
+ bool OnlyOptimizeForSize = false)
: FunctionPass(ID), TM(TM), DL(TM->getDataLayout()),
- MaxOffset(MaximalOffset) {
+ MaxOffset(MaximalOffset), OnlyOptimizeForSize(OnlyOptimizeForSize) {
@@ -273,6 +280,8 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
// users, so look through ConstantExpr...
Use *UI, *UE;
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
+ if (CE->use_empty())
+ continue;
UI = &*CE->use_begin();
UE = nullptr;
} else if (isa<Instruction>(U.getUser())) {
@@ -290,6 +299,12 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Function *ParentFn = I->getParent()->getParent();
+ // If we're only optimizing for size, ignore non-minsize functions.
+ if (OnlyOptimizeForSize &&
+ !ParentFn->hasFnAttribute(Attribute::MinSize))
+ continue;
size_t UGSIdx = GlobalUsesByFunction[ParentFn];
// If this is the first global the basic block uses, map it to the set
@@ -585,6 +600,7 @@ bool GlobalMerge::doFinalization(Module &M) {
return false;
-Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset) {
- return new GlobalMerge(TM, Offset);
+Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset,
+ bool OnlyOptimizeForSize) {
+ return new GlobalMerge(TM, Offset, OnlyOptimizeForSize);
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 0d59c72..e861ceb 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -170,9 +170,12 @@ namespace {
bool PreRegAlloc;
bool MadeChange;
int FnNum;
+ std::function<bool(const Function &)> PredicateFtor;
static char ID;
- IfConverter() : MachineFunctionPass(ID), FnNum(-1) {
+ IfConverter(std::function<bool(const Function &)> Ftor = nullptr)
+ : MachineFunctionPass(ID), FnNum(-1), PredicateFtor(Ftor) {
@@ -270,6 +273,9 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
+ if (PredicateFtor && !PredicateFtor(*MF.getFunction()))
+ return false;
const TargetSubtargetInfo &ST = MF.getSubtarget();
TLI = ST.getTargetLowering();
TII = ST.getInstrInfo();
@@ -1691,3 +1697,8 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
ToBBI.IsAnalyzed = false;
FromBBI.IsAnalyzed = false;
+FunctionPass *
+llvm::createIfConverter(std::function<bool(const Function &)> Ftor) {
+ return new IfConverter(Ftor);
diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt
index fee0347..05905d0 100644
--- a/lib/CodeGen/LLVMBuild.txt
+++ b/lib/CodeGen/LLVMBuild.txt
@@ -16,7 +16,7 @@
-subdirectories = AsmPrinter SelectionDAG
+subdirectories = AsmPrinter SelectionDAG MIRParser
type = Library
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 610c9f4..ff52058 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -150,12 +150,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(
return true;
if (StopAfter) {
- // FIXME: The intent is that this should eventually write out a YAML file,
- // containing the LLVM IR, the machine-level IR (when stopping after a
- // machine-level pass), and whatever other information is needed to
- // deserialize the code and resume compilation. For now, just write the
- // LLVM IR.
- PM.add(createPrintModulePass(Out));
+ PM.add(createPrintMIRPass(outs()));
return false;
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index adca4cc..c00b010 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -223,11 +223,11 @@ void LiveIntervals::computeRegMasks() {
RMB.first = RegMaskSlots.size();
for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end();
MI != ME; ++MI)
- for (MIOperands MO(MI); MO.isValid(); ++MO) {
- if (!MO->isRegMask())
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isRegMask())
- RegMaskBits.push_back(MO->getRegMask());
+ RegMaskBits.push_back(MO.getRegMask());
// Compute the number of register mask instructions in this block.
RMB.second = RegMaskSlots.size() - RMB.first;
@@ -927,23 +927,23 @@ public:
void updateAllRanges(MachineInstr *MI) {
DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI);
bool hasRegMask = false;
- for (MIOperands MO(MI); MO.isValid(); ++MO) {
- if (MO->isRegMask())
+ for (MachineOperand &MO : MI->operands()) {
+ if (MO.isRegMask())
hasRegMask = true;
- if (!MO->isReg())
+ if (!MO.isReg())
// Aggressively clear all kill flags.
// They are reinserted by VirtRegRewriter.
- if (MO->isUse())
- MO->setIsKill(false);
+ if (MO.isUse())
+ MO.setIsKill(false);
- unsigned Reg = MO->getReg();
+ unsigned Reg = MO.getReg();
if (!Reg)
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
LiveInterval &LI = LIS.getInterval(Reg);
if (LI.hasSubRanges()) {
- unsigned SubReg = MO->getSubReg();
+ unsigned SubReg = MO.getSubReg();
unsigned LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
for (LiveInterval::SubRange &S : LI.subranges()) {
if ((S.LaneMask & LaneMask) == 0)
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 27c57d5..08bbe0c 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -218,6 +218,22 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
return true;
+bool LiveRangeEdit::useIsKill(const LiveInterval &LI,
+ const MachineOperand &MO) const {
+ const MachineInstr *MI = MO.getParent();
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+ if (LI.Query(Idx).isKill())
+ return true;
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ unsigned SubReg = MO.getSubReg();
+ unsigned LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
+ for (const LiveInterval::SubRange &S : LI.subranges()) {
+ if ((S.LaneMask & LaneMask) != 0 && S.Query(Idx).isKill())
+ return true;
+ }
+ return false;
/// Find all live intervals that need to shrink, then remove the instruction.
void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
assert(MI->allDefsAreDead() && "Def isn't really dead");
@@ -266,9 +282,8 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
// unlikely to change anything. We typically don't want to shrink the
// PIC base register that has lots of uses everywhere.
// Always shrink COPY uses that probably come from live range splitting.
- if (MI->readsVirtualRegister(Reg) &&
- (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) ||
- LI.Query(Idx).isKill()))
+ if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MOI->isDef())) ||
+ (MOI->readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, *MOI))))
// Remove defined value.
diff --git a/lib/CodeGen/MIRParser/CMakeLists.txt b/lib/CodeGen/MIRParser/CMakeLists.txt
new file mode 100644
index 0000000..468f072
--- /dev/null
+++ b/lib/CodeGen/MIRParser/CMakeLists.txt
@@ -0,0 +1,5 @@
+ MIRParser.cpp
+ )
+add_dependencies(LLVMMIRParser intrinsics_gen)
diff --git a/lib/CodeGen/MIRParser/LLVMBuild.txt b/lib/CodeGen/MIRParser/LLVMBuild.txt
new file mode 100644
index 0000000..04ae722
--- /dev/null
+++ b/lib/CodeGen/MIRParser/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/CodeGen/MIRParser/LLVMBuild.txt --------------------*- Conf -*--===;
+; The LLVM Compiler Infrastructure
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+; This is an LLVMBuild description file for the components in this subdirectory.
+; For more information on the LLVMBuild system, please see:
+type = Library
+name = MIRParser
+parent = CodeGen
+required_libraries = Core Support Target AsmParser CodeGen
diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp
new file mode 100644
index 0000000..7a51b38
--- /dev/null
+++ b/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -0,0 +1,171 @@
+//===- MIRParser.cpp - MIR serialization format parser implementation -----===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file implements the class that parses the optional LLVM IR and machine
+// functions that are stored in MIR files.
+#include "llvm/CodeGen/MIRParser/MIRParser.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <memory>
+using namespace llvm;
+namespace {
+/// This class implements the parsing of LLVM IR that's embedded inside a MIR
+/// file.
+class MIRParserImpl {
+ SourceMgr SM;
+ StringRef Filename;
+ LLVMContext &Context;
+ MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename,
+ LLVMContext &Context);
+ /// Try to parse the optional LLVM module and the machine functions in the MIR
+ /// file.
+ ///
+ /// Return null if an error occurred.
+ std::unique_ptr<Module> parse(SMDiagnostic &Error);
+ /// Parse the machine function in the current YAML document.
+ ///
+ /// Return true if an error occurred.
+ bool parseMachineFunction(yaml::Input &In);
+ /// Return a MIR diagnostic converted from an LLVM assembly diagnostic.
+ SMDiagnostic diagFromLLVMAssemblyDiag(const SMDiagnostic &Error,
+ SMRange SourceRange);
+} // end anonymous namespace
+MIRParserImpl::MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents,
+ StringRef Filename, LLVMContext &Context)
+ : SM(), Filename(Filename), Context(Context) {
+ SM.AddNewSourceBuffer(std::move(Contents), SMLoc());
+static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) {
+ *reinterpret_cast<SMDiagnostic *>(Context) = Diag;
+std::unique_ptr<Module> MIRParserImpl::parse(SMDiagnostic &Error) {
+ yaml::Input In(SM.getMemoryBuffer(SM.getMainFileID())->getBuffer(),
+ /*Ctxt=*/nullptr, handleYAMLDiag, &Error);
+ if (!In.setCurrentDocument()) {
+ if (!Error.getMessage().empty())
+ return nullptr;
+ // Create an empty module when the MIR file is empty.
+ return llvm::make_unique<Module>(Filename, Context);
+ }
+ std::unique_ptr<Module> M;
+ // Parse the block scalar manually so that we can return unique pointer
+ // without having to go trough YAML traits.
+ if (const auto *BSN =
+ dyn_cast_or_null<yaml::BlockScalarNode>(In.getCurrentNode())) {
+ M = parseAssembly(MemoryBufferRef(BSN->getValue(), Filename), Error,
+ Context);
+ if (!M) {
+ Error = diagFromLLVMAssemblyDiag(Error, BSN->getSourceRange());
+ return M;
+ }
+ In.nextDocument();
+ if (!In.setCurrentDocument())
+ return M;
+ } else {
+ // Create an new, empty module.
+ M = llvm::make_unique<Module>(Filename, Context);
+ }
+ // Parse the machine functions.
+ do {
+ if (parseMachineFunction(In))
+ return nullptr;
+ In.nextDocument();
+ } while (In.setCurrentDocument());
+ return M;
+bool MIRParserImpl::parseMachineFunction(yaml::Input &In) {
+ yaml::MachineFunction MF;
+ yaml::yamlize(In, MF, false);
+ if (In.error())
+ return true;
+ // TODO: Initialize the real machine function with the state in the yaml
+ // machine function later on.
+ return false;
+SMDiagnostic MIRParserImpl::diagFromLLVMAssemblyDiag(const SMDiagnostic &Error,
+ SMRange SourceRange) {
+ assert(SourceRange.isValid());
+ // Translate the location of the error from the location in the llvm IR string
+ // to the corresponding location in the MIR file.
+ auto LineAndColumn = SM.getLineAndColumn(SourceRange.Start);
+ unsigned Line = LineAndColumn.first + Error.getLineNo() - 1;
+ unsigned Column = Error.getColumnNo();
+ StringRef LineStr = Error.getLineContents();
+ SMLoc Loc = Error.getLoc();
+ // Get the full line and adjust the column number by taking the indentation of
+ // LLVM IR into account.
+ for (line_iterator L(*SM.getMemoryBuffer(SM.getMainFileID()), false), E;
+ L != E; ++L) {
+ if (L.line_number() == Line) {
+ LineStr = *L;
+ Loc = SMLoc::getFromPointer(;
+ auto Indent = LineStr.find(Error.getLineContents());
+ if (Indent != StringRef::npos)
+ Column += Indent;
+ break;
+ }
+ }
+ return SMDiagnostic(SM, Loc, Filename, Line, Column, Error.getKind(),
+ Error.getMessage(), LineStr, Error.getRanges(),
+ Error.getFixIts());
+std::unique_ptr<Module> llvm::parseMIRFile(StringRef Filename,
+ SMDiagnostic &Error,
+ LLVMContext &Context) {
+ auto FileOrErr = MemoryBuffer::getFile(Filename);
+ if (std::error_code EC = FileOrErr.getError()) {
+ Error = SMDiagnostic(Filename, SourceMgr::DK_Error,
+ "Could not open input file: " + EC.message());
+ return std::unique_ptr<Module>();
+ }
+ return parseMIR(std::move(FileOrErr.get()), Error, Context);
+std::unique_ptr<Module> llvm::parseMIR(std::unique_ptr<MemoryBuffer> Contents,
+ SMDiagnostic &Error,
+ LLVMContext &Context) {
+ auto Filename = Contents->getBufferIdentifier();
+ MIRParserImpl Parser(std::move(Contents), Filename, Context);
+ return Parser.parse(Error);
diff --git a/lib/CodeGen/MIRParser/Makefile b/lib/CodeGen/MIRParser/Makefile
new file mode 100644
index 0000000..c02d188
--- /dev/null
+++ b/lib/CodeGen/MIRParser/Makefile
@@ -0,0 +1,13 @@
+##===- lib/CodeGen/MIRParser/Makefile ----------------------*- Makefile -*-===##
+# The LLVM Compiler Infrastructure
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+LEVEL = ../../..
+include $(LEVEL)/Makefile.common
diff --git a/lib/CodeGen/MIRPrintingPass.cpp b/lib/CodeGen/MIRPrintingPass.cpp
new file mode 100644
index 0000000..5e0f4cd
--- /dev/null
+++ b/lib/CodeGen/MIRPrintingPass.cpp
@@ -0,0 +1,109 @@
+//===- MIRPrintingPass.cpp - Pass that prints out using the MIR format ----===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file implements a pass that prints out the LLVM module using the MIR
+// serialization format.
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/YAMLTraits.h"
+using namespace llvm;
+namespace llvm {
+namespace yaml {
+/// This struct serializes the LLVM IR module.
+template <> struct BlockScalarTraits<Module> {
+ static void output(const Module &Mod, void *Ctxt, raw_ostream &OS) {
+ Mod.print(OS, nullptr);
+ }
+ static StringRef input(StringRef Str, void *Ctxt, Module &Mod) {
+ llvm_unreachable("LLVM Module is supposed to be parsed separately");
+ return "";
+ }
+} // end namespace yaml
+} // end namespace llvm
+namespace {
+/// This class prints out the machine functions using the MIR serialization
+/// format.
+class MIRPrinter {
+ raw_ostream &OS;
+ MIRPrinter(raw_ostream &OS) : OS(OS) {}
+ void print(const MachineFunction &MF);
+void MIRPrinter::print(const MachineFunction &MF) {
+ yaml::MachineFunction YamlMF;
+ YamlMF.Name = MF.getName();
+ yaml::Output Out(OS);
+ Out << YamlMF;
+/// This pass prints out the LLVM IR to an output stream using the MIR
+/// serialization format.
+struct MIRPrintingPass : public MachineFunctionPass {
+ static char ID;
+ raw_ostream &OS;
+ std::string MachineFunctions;
+ MIRPrintingPass() : MachineFunctionPass(ID), OS(dbgs()) {}
+ MIRPrintingPass(raw_ostream &OS) : MachineFunctionPass(ID), OS(OS) {}
+ const char *getPassName() const override { return "MIR Printing Pass"; }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ virtual bool runOnMachineFunction(MachineFunction &MF) override {
+ std::string Str;
+ raw_string_ostream StrOS(Str);
+ MIRPrinter(StrOS).print(MF);
+ MachineFunctions.append(StrOS.str());
+ return false;
+ }
+ virtual bool doFinalization(Module &M) override {
+ yaml::Output Out(OS);
+ Out << M;
+ OS << MachineFunctions;
+ return false;
+ }
+char MIRPrintingPass::ID = 0;
+} // end anonymous namespace
+char &llvm::MIRPrintingPassID = MIRPrintingPass::ID;
+INITIALIZE_PASS(MIRPrintingPass, "mir-printer", "MIR Printer", false, false)
+namespace llvm {
+MachineFunctionPass *createPrintMIRPass(raw_ostream &OS) {
+ return new MIRPrintingPass(OS);
+} // end namespace llvm
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 43c80b7..a686341 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -54,7 +54,6 @@ namespace {
SourceMap &SrcMap,
DenseMap<unsigned, MachineInstr*> &AvailCopyMap);
bool CopyPropagateBlock(MachineBasicBlock &MBB);
- void removeCopy(MachineInstr *MI);
char MachineCopyPropagation::ID = 0;
@@ -127,13 +126,6 @@ static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src,
return false;
-// Remove MI from the function because it has been determined it is dead.
-// Turn it into a noop KILL instruction as opposed to removing it to
-// maintain imp-use/imp-def chains.
-void MachineCopyPropagation::removeCopy(MachineInstr *MI) {
- MI->setDesc(TII->get(TargetOpcode::KILL));
bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion
DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map
@@ -183,7 +175,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I)
I->clearRegisterKills(Def, TRI);
- removeCopy(MI);
+ MI->eraseFromParent();
Changed = true;
@@ -252,11 +244,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
report_fatal_error("MachineCopyPropagation should be run after"
" register allocation!");
- // Treat undef use like defs.
- // The backends are allowed to do whatever they want with undef value
- // and we cannot be sure this register will not be rewritten to break
- // some false dependencies for the hardware for instance.
- if (MO.isDef() || MO.isUndef()) {
+ if (MO.isDef()) {
@@ -270,6 +258,14 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
+ // Treat undef use like defs for copy propagation but not for
+ // dead copy. We would need to do a liveness check to be sure the copy
+ // is dead for undef uses.
+ // The backends are allowed to do whatever they want with undef value
+ // and we cannot be sure this register will not be rewritten to break
+ // some false dependencies for the hardware for instance.
+ if (MO.isUndef())
+ Defs.push_back(Reg);
// The instruction has a register mask operand which means that it clobbers
@@ -287,7 +283,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: ";
- removeCopy(*DI);
+ (*DI)->eraseFromParent();
Changed = true;
@@ -323,7 +319,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
DI != DE; ++DI) {
if (!MRI->isReserved((*DI)->getOperand(0).getReg())) {
- removeCopy(*DI);
+ (*DI)->eraseFromParent();
Changed = true;
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 8ec63f8..09662b6 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -584,12 +584,8 @@ int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
return -++NumFixedObjects;
-MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
- assert(MBB && "MBB must be valid");
- const MachineFunction *MF = MBB->getParent();
- assert(MF && "MBB must be part of a MachineFunction");
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
BitVector BV(TRI->getNumRegs());
// Before CSI is calculated, no registers are considered pristine. They can be
@@ -597,14 +593,10 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
if (!isCalleeSavedInfoValid())
return BV;
- for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
+ for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
- // Each MBB before the save point has all CSRs pristine.
- if (isBeforeSavePoint(*MF, *MBB))
- return BV;
- // On other MBBs the saved CSRs are not pristine.
+ // Saved CSRs are not pristine.
const std::vector<CalleeSavedInfo> &CSI = getCalleeSavedInfo();
for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
E = CSI.end(); I != E; ++I)
@@ -613,40 +605,6 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
return BV;
-// Note: We could use some sort of caching mecanism, but we lack the ability
-// to know when the cache is invalid, i.e., the CFG changed.
-// Assuming we have that, we can simply compute all the set of MBBs
-// that are before the save point.
-bool MachineFrameInfo::isBeforeSavePoint(const MachineFunction &MF,
- const MachineBasicBlock &MBB) const {
- // Early exit if shrink-wrapping did not kick.
- if (!Save)
- return &MBB == &MF.front();
- // Starting from MBB, check if there is a path leading to Save that do
- // not cross Restore.
- SmallPtrSet<const MachineBasicBlock *, 8> Visited;
- SmallVector<const MachineBasicBlock *, 8> WorkList;
- WorkList.push_back(&MBB);
- Visited.insert(&MBB);
- do {
- const MachineBasicBlock *CurBB = WorkList.pop_back_val();
- // By construction, the region that is after the save point is
- // dominated by the Save and post-dominated by the Restore.
- // If we do not reach Restore and still reach Save, this
- // means MBB is before Save.
- if (CurBB == Save)
- return true;
- if (CurBB == Restore)
- continue;
- // Enqueue all the successors not already visited.
- for (MachineBasicBlock *SuccBB : CurBB->successors())
- if (Visited.insert(SuccBB).second)
- WorkList.push_back(SuccBB);
- } while (!WorkList.empty());
- return false;
unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 205032f..e671028 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -1092,9 +1092,8 @@ const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg(
OpndIt.getOperandNo(), Reg, CurRC, TII, TRI);
// Otherwise, just check the current operands.
- for (ConstMIOperands OpndIt(this); OpndIt.isValid() && CurRC; ++OpndIt)
- CurRC = getRegClassConstraintEffectForVRegImpl(OpndIt.getOperandNo(), Reg,
- CurRC, TII, TRI);
+ for (unsigned i = 0, e = NumOperands; i < e && CurRC; ++i)
+ CurRC = getRegClassConstraintEffectForVRegImpl(i, Reg, CurRC, TII, TRI);
return CurRC;
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index 0690f08..cd820ee 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -23,11 +23,15 @@ namespace {
class UnpackMachineBundles : public MachineFunctionPass {
static char ID; // Pass identification
- UnpackMachineBundles() : MachineFunctionPass(ID) {
+ UnpackMachineBundles(std::function<bool(const Function &)> Ftor = nullptr)
+ : MachineFunctionPass(ID), PredicateFtor(Ftor) {
bool runOnMachineFunction(MachineFunction &MF) override;
+ private:
+ std::function<bool(const Function &)> PredicateFtor;
} // end anonymous namespace
@@ -37,6 +41,9 @@ INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundles",
"Unpack machine instruction bundles", false, false)
bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+ if (PredicateFtor && !PredicateFtor(*MF.getFunction()))
+ return false;
bool Changed = false;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = &*I;
@@ -69,6 +76,10 @@ bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
return Changed;
+FunctionPass *
+llvm::createUnpackMachineBundles(std::function<bool(const Function &)> Ftor) {
+ return new UnpackMachineBundles(Ftor);
namespace {
class FinalizeMachineBundles : public MachineFunctionPass {
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 3967a2f..cce590c 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -1012,10 +1012,10 @@ bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
SmallVector<const MachineInstr*, 8> Work(1, MI);
do {
MI = Work.pop_back_val();
- for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
- if (!MO->isReg() || !MO->isDef())
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || !MO.isDef())
- unsigned Reg = MO->getReg();
+ unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg))
for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index d9da7bc..eec984f 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -114,7 +114,7 @@ MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
// Otherwise, this is a new entry, create a new symbol for it and add an
// entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
- BBCallbacks.push_back(BB);
+ BBCallbacks.emplace_back(BB);
Entry.Index = BBCallbacks.size()-1;
Entry.Fn = BB->getParent();
@@ -308,6 +308,7 @@ void MachineModuleInfo::EndFunction() {
// Clean up exception info.
+ PersonalityTypeCache = EHPersonality::Unknown;
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 5f03390..aed0e50 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -655,6 +655,10 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
if (!MI->isSafeToMove(AA, SawStore))
return false;
+ // Convergent operations may only be moved to control equivalent locations.
+ if (MI->isConvergent())
+ return false;
// FIXME: This should include support for sinking instructions within the
// block they are currently in to shorten the live ranges. We often get
// instructions sunk into the top of a large block, but it would be better to
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index e07250b..34ac9d5 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -627,10 +627,12 @@ static bool getDataDeps(const MachineInstr *UseMI,
SmallVectorImpl<DataDep> &Deps,
const MachineRegisterInfo *MRI) {
bool HasPhysRegs = false;
- for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
- if (!MO->isReg())
+ for (MachineInstr::const_mop_iterator I = UseMI->operands_begin(),
+ E = UseMI->operands_end(); I != E; ++I) {
+ const MachineOperand &MO = *I;
+ if (!MO.isReg())
- unsigned Reg = MO->getReg();
+ unsigned Reg = MO.getReg();
if (!Reg)
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -638,8 +640,8 @@ static bool getDataDeps(const MachineInstr *UseMI,
// Collect virtual register reads.
- if (MO->readsReg())
- Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
+ if (MO.readsReg())
+ Deps.push_back(DataDep(MRI, Reg, UseMI->getOperandNo(I)));
return HasPhysRegs;
@@ -690,28 +692,30 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI,
SmallVector<unsigned, 8> Kills;
SmallVector<unsigned, 8> LiveDefOps;
- for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
- if (!MO->isReg())
+ for (MachineInstr::const_mop_iterator MI = UseMI->operands_begin(),
+ ME = UseMI->operands_end(); MI != ME; ++MI) {
+ const MachineOperand &MO = *MI;
+ if (!MO.isReg())
- unsigned Reg = MO->getReg();
+ unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isPhysicalRegister(Reg))
// Track live defs and kills for updating RegUnits.
- if (MO->isDef()) {
- if (MO->isDead())
+ if (MO.isDef()) {
+ if (MO.isDead())
- LiveDefOps.push_back(MO.getOperandNo());
- } else if (MO->isKill())
+ LiveDefOps.push_back(UseMI->getOperandNo(MI));
+ } else if (MO.isKill())
// Identify dependencies.
- if (!MO->readsReg())
+ if (!MO.readsReg())
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
if (I == RegUnits.end())
- Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
+ Deps.push_back(DataDep(I->MI, I->Op, UseMI->getOperandNo(MI)));
@@ -864,15 +868,18 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
SmallVector<unsigned, 8> ReadOps;
- for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
- if (!MO->isReg())
+ for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ const MachineOperand &MO = *MOI;
+ if (!MO.isReg())
- unsigned Reg = MO->getReg();
+ unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isPhysicalRegister(Reg))
- if (MO->readsReg())
- ReadOps.push_back(MO.getOperandNo());
- if (!MO->isDef())
+ if (MO.readsReg())
+ ReadOps.push_back(MI->getOperandNo(MOI));
+ if (!MO.isDef())
// This is a def of Reg. Remove corresponding entries from RegUnits, and
// update MI Height to consider the physreg dependencies.
@@ -885,7 +892,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
// We may not know the UseMI of this dependency, if it came from the
// live-in list. SchedModel can handle a NULL UseMI.
DepHeight += SchedModel
- .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op);
+ .computeOperandLatency(MI, MI->getOperandNo(MOI), I->MI, I->Op);
Height = std::max(Height, DepHeight);
// This regunit is dead above MI.
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index f5edcb7..ca35ec5 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -694,7 +694,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
const MachineFrameInfo *MFI = MF->getFrameInfo();
assert(MFI && "Function has no frame info");
- BitVector PR = MFI->getPristineRegs(MBB);
+ BitVector PR = MFI->getPristineRegs(*MF);
for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
diff --git a/lib/CodeGen/Makefile b/lib/CodeGen/Makefile
index 4ab3e3c..96f7ca5 100644
--- a/lib/CodeGen/Makefile
+++ b/lib/CodeGen/Makefile
@@ -9,7 +9,7 @@
LEVEL = ../..
-PARALLEL_DIRS = SelectionDAG AsmPrinter
+PARALLEL_DIRS = SelectionDAG AsmPrinter MIRParser
include $(LEVEL)/Makefile.common
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 6902243..4cd86e6 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -295,6 +295,24 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
if (verifyAfter)
+ // Add the passes after the pass P if there is any.
+ for (SmallVectorImpl<std::pair<AnalysisID, IdentifyingPassPtr> >::iterator
+ I = Impl->InsertedPasses.begin(),
+ E = Impl->InsertedPasses.end();
+ I != E; ++I) {
+ if ((*I).first == PassID) {
+ assert((*I).second.isValid() && "Illegal Pass ID!");
+ Pass *NP;
+ if ((*I).second.isInstance())
+ NP = (*I).second.getInstance();
+ else {
+ NP = Pass::createPass((*I).second.getID());
+ assert(NP && "Pass ID not registered");
+ }
+ addPass(NP, false, false);
+ }
+ }
} else {
delete P;
@@ -329,22 +347,6 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter,
AnalysisID FinalID = P->getPassID();
addPass(P, verifyAfter, printAfter); // Ends the lifetime of P.
- // Add the passes after the pass P if there is any.
- for (SmallVectorImpl<std::pair<AnalysisID, IdentifyingPassPtr> >::iterator
- I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end();
- I != E; ++I) {
- if ((*I).first == PassID) {
- assert((*I).second.isValid() && "Illegal Pass ID!");
- Pass *NP;
- if ((*I).second.isInstance())
- NP = (*I).second.getInstance();
- else {
- NP = Pass::createPass((*I).second.getID());
- assert(NP && "Pass ID not registered");
- }
- addPass(NP, false, false);
- }
- }
return FinalID;
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index b153800..5f81949 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -68,8 +68,8 @@ bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) {
!MI->isRegSequence() &&
return false;
- for (MIOperands MO(MI); MO.isValid(); ++MO)
- if (MO->isReg() && MO->isUse() && MO->readsReg())
+ for (const MachineOperand &MO : MI->operands())
+ if (MO.isReg() && MO.isUse() && MO.readsReg())
return false;
return true;
@@ -100,17 +100,17 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end();
bool Found = false;
for (++UserMI; UserMI != UserE; ++UserMI) {
- for (MIOperands MO(UserMI); MO.isValid(); ++MO) {
- if (!MO->isReg())
+ for (MachineOperand &MO : UserMI->operands()) {
+ if (!MO.isReg())
- unsigned UserReg = MO->getReg();
+ unsigned UserReg = MO.getReg();
if (!TargetRegisterInfo::isPhysicalRegister(UserReg) ||
!TRI->regsOverlap(Reg, UserReg))
// UserMI uses or redefines Reg. Set <undef> flags on all uses.
Found = true;
- if (MO->isUse())
- MO->setIsUndef();
+ if (MO.isUse())
+ MO.setIsUndef();
if (Found)
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index ac7d98f..e513a4f 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -1834,12 +1834,12 @@ public:
unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
const {
unsigned L = 0;
- for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) {
- if (!MO->isReg() || MO->getReg() != Reg || !MO->isDef())
+ for (const MachineOperand &MO : DefMI->operands()) {
+ if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef())
L |= TRI->getSubRegIndexLaneMask(
- TRI->composeSubRegIndices(SubIdx, MO->getSubReg()));
- if (MO->readsReg())
+ TRI->composeSubRegIndices(SubIdx, MO.getSubReg()));
+ if (MO.readsReg())
Redef = true;
return L;
@@ -2224,13 +2224,13 @@ bool JoinVals::usesLanes(const MachineInstr *MI, unsigned Reg, unsigned SubIdx,
unsigned Lanes) const {
if (MI->isDebugValue())
return false;
- for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
- if (!MO->isReg() || MO->isDef() || MO->getReg() != Reg)
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg)
- if (!MO->readsReg())
+ if (!MO.readsReg())
if (Lanes & TRI->getSubRegIndexLaneMask(
- TRI->composeSubRegIndices(SubIdx, MO->getSubReg())))
+ TRI->composeSubRegIndices(SubIdx, MO.getSubReg())))
return true;
return false;
@@ -2339,11 +2339,11 @@ void JoinVals::pruneValues(JoinVals &Other,
// Remove <def,read-undef> flags. This def is now a partial redef.
// Also remove <def,dead> flags since the joined live range will
// continue past this instruction.
- for (MIOperands MO(Indexes->getInstructionFromIndex(Def));
- MO.isValid(); ++MO) {
- if (MO->isReg() && MO->isDef() && MO->getReg() == Reg) {
- MO->setIsUndef(EraseImpDef);
- MO->setIsDead(false);
+ for (MachineOperand &MO :
+ Indexes->getInstructionFromIndex(Def)->operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
+ MO.setIsUndef(EraseImpDef);
+ MO.setIsDead(false);
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 667783e..450a305 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -60,11 +60,11 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
dumpRegSetPressure(MaxSetPressure, TRI);
dbgs() << "Live In: ";
for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i)
- dbgs() << PrintReg(LiveInRegs[i], TRI) << " ";
+ dbgs() << PrintVRegOrUnit(LiveInRegs[i], TRI) << " ";
dbgs() << '\n';
dbgs() << "Live Out: ";
for (unsigned i = 0, e = LiveOutRegs.size(); i < e; ++i)
- dbgs() << PrintReg(LiveOutRegs[i], TRI) << " ";
+ dbgs() << PrintVRegOrUnit(LiveOutRegs[i], TRI) << " ";
dbgs() << '\n';
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 7626dd2..a34bd63 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -55,7 +55,8 @@ void RegScavenger::initRegState() {
// Pristine CSRs are also unavailable.
- BitVector PR = MBB->getParent()->getFrameInfo()->getPristineRegs(MBB);
+ const MachineFunction &MF = *MBB->getParent();
+ BitVector PR = MF.getFrameInfo()->getPristineRegs(MF);
for (int I = PR.find_first(); I>0; I = PR.find_next(I))
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index c60c518..e8e47b7 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -1106,25 +1106,25 @@ static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
MachineBasicBlock::instr_iterator Begin = MI;
MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
while (Begin != End) {
- for (MIOperands MO(--End); MO.isValid(); ++MO) {
- if (!MO->isReg() || MO->isDef() || Reg != MO->getReg())
+ for (MachineOperand &MO : (--End)->operands()) {
+ if (!MO.isReg() || MO.isDef() || Reg != MO.getReg())
// DEBUG_VALUE nodes do not contribute to code generation and should
// always be ignored. Failure to do so may result in trying to modify
// KILL flags on DEBUG_VALUE nodes, which is distressing.
- if (MO->isDebug())
+ if (MO.isDebug())
// If the register has the internal flag then it could be killing an
// internal def of the register. In this case, just skip. We only want
// to toggle the flag on operands visible outside the bundle.
- if (MO->isInternalRead())
+ if (MO.isInternalRead())
- if (MO->isKill() == NewKillState)
+ if (MO.isKill() == NewKillState)
- MO->setIsKill(NewKillState);
+ MO.setIsKill(NewKillState);
if (NewKillState)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2c2dc85..a71c676 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -619,7 +619,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
// fold (fneg (fsub 0, B)) -> B
if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
- if (N0CFP->getValueAPF().isZero())
+ if (N0CFP->isZero())
return Op.getOperand(1);
// fold (fneg (fsub A, B)) -> (fsub B, A)
@@ -1587,6 +1587,11 @@ static bool isNullConstant(SDValue V) {
return Const != nullptr && Const->isNullValue();
+static bool isNullFPConstant(SDValue V) {
+ ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
+ return Const != nullptr && Const->isZero() && !Const->isNegative();
static bool isAllOnesConstant(SDValue V) {
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
return Const != nullptr && Const->isAllOnesValue();
@@ -4764,7 +4769,7 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (ctlz c1) -> c2
- if (isa<ConstantSDNode>(N0))
+ if (isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
return SDValue();
@@ -4774,7 +4779,7 @@ SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (ctlz_zero_undef c1) -> c2
- if (isa<ConstantSDNode>(N0))
+ if (isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
return SDValue();
@@ -4784,7 +4789,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (cttz c1) -> c2
- if (isa<ConstantSDNode>(N0))
+ if (isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
return SDValue();
@@ -4794,7 +4799,7 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (cttz_zero_undef c1) -> c2
- if (isa<ConstantSDNode>(N0))
+ if (isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
return SDValue();
@@ -4804,7 +4809,7 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (ctpop c1) -> c2
- if (isa<ConstantSDNode>(N0))
+ if (isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
return SDValue();
@@ -7859,7 +7864,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
bool AllowNewConst = (Level < AfterLegalizeDAG);
// fold (fadd A, 0) -> A
- if (N1CFP && N1CFP->getValueAPF().isZero())
+ if (N1CFP && N1CFP->isZero())
return N0;
// fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
@@ -7990,11 +7995,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// If 'unsafe math' is enabled, fold lots of things.
if (Options.UnsafeFPMath) {
// (fsub A, 0) -> A
- if (N1CFP && N1CFP->getValueAPF().isZero())
+ if (N1CFP && N1CFP->isZero())
return N0;
// (fsub 0, B) -> -B
- if (N0CFP && N0CFP->getValueAPF().isZero()) {
+ if (N0CFP && N0CFP->isZero()) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
@@ -8060,7 +8065,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (Options.UnsafeFPMath) {
// fold (fmul A, 0) -> 0
- if (N1CFP && N1CFP->getValueAPF().isZero())
+ if (N1CFP && N1CFP->isZero())
return N1;
// fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
@@ -8776,7 +8781,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
// (fneg (fmul c, x)) -> (fmul -c, x)
- if (N0.getOpcode() == ISD::FMUL) {
+ if (N0.getOpcode() == ISD::FMUL &&
+ (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
if (CFP1) {
APFloat CVal = CFP1->getValueAPF();
@@ -9061,14 +9067,18 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
SelectionDAG &DAG,
const TargetLowering &TLI) {
+ unsigned AS;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
return false;
VT = LD->getMemoryVT();
+ AS = LD->getAddressSpace();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
return false;
VT = ST->getMemoryVT();
+ AS = ST->getAddressSpace();
} else
return false;
@@ -9092,7 +9102,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
} else
return false;
- return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
+ return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()), AS);
/// Try turning a load/store into a pre-indexed load/store when the base
@@ -11908,9 +11918,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (Op.getOpcode() == ISD::UNDEF) continue;
// See if we can combine this build_vector into a blend with a zero vector.
- if (!VecIn2.getNode() && (isNullConstant(Op) ||
- (Op.getOpcode() == ISD::ConstantFP &&
- cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) {
+ if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
UsesZeroVector = true;
@@ -12988,7 +12996,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
N->getOpcode() == ISD::FDIV) {
if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP &&
- cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
+ cast<ConstantFPSDNode>(RHSOp.getNode())->isZero()))
@@ -13252,7 +13260,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
// Check to see if we can simplify the select into an fabs node
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
// Allow either -0.0 or 0.0
- if (CFP->getValueAPF().isZero()) {
+ if (CFP->isZero()) {
// select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
N0 == N2 && N3.getOpcode() == ISD::FNEG &&
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 7b5b8c4..f3d75cb 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -80,33 +80,6 @@ static ISD::NodeType getPreferredExtendForValue(const Value *V) {
return ExtendKind;
-namespace {
-struct WinEHNumbering {
- WinEHNumbering(WinEHFuncInfo &FuncInfo) : FuncInfo(FuncInfo),
- CurrentBaseState(-1), NextState(0) {}
- WinEHFuncInfo &FuncInfo;
- int CurrentBaseState;
- int NextState;
- SmallVector<std::unique_ptr<ActionHandler>, 4> HandlerStack;
- SmallPtrSet<const Function *, 4> VisitedHandlers;
- int currentEHNumber() const {
- return HandlerStack.empty() ? CurrentBaseState : HandlerStack.back()->getEHState();
- }
- void createUnwindMapEntry(int ToState, ActionHandler *AH);
- void createTryBlockMapEntry(int TryLow, int TryHigh,
- ArrayRef<CatchHandler *> Handlers);
- void processCallSite(MutableArrayRef<std::unique_ptr<ActionHandler>> Actions,
- ImmutableCallSite CS);
- void popUnmatchedActions(int FirstMismatch);
- void calculateStateNumbers(const Function &F);
- void findActionRootLPads(const Function &F);
void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
SelectionDAG *DAG) {
Fn = &fn;
@@ -291,31 +264,18 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
if (!isMSVCEHPersonality(Personality))
- WinEHFuncInfo *EHInfo = nullptr;
if (Personality == EHPersonality::MSVC_Win64SEH) {
} else if (Personality == EHPersonality::MSVC_CXX) {
const Function *WinEHParentFn = MMI.getWinEHParent(&fn);
- EHInfo = &MMI.getWinEHFuncInfo(WinEHParentFn);
- if (EHInfo->LandingPadStateMap.empty()) {
- WinEHNumbering Num(*EHInfo);
- Num.findActionRootLPads(*WinEHParentFn);
- // The VisitedHandlers list is used by both findActionRootLPads and
- // calculateStateNumbers, but both functions need to visit all handlers.
- Num.VisitedHandlers.clear();
- Num.calculateStateNumbers(*WinEHParentFn);
- // Pop everything on the handler stack.
- // It may be necessary to call this more than once because a handler can
- // be pushed on the stack as a result of clearing the stack.
- while (!Num.HandlerStack.empty())
- Num.processCallSite(None, ImmutableCallSite());
- }
+ WinEHFuncInfo &EHInfo = MMI.getWinEHFuncInfo(WinEHParentFn);
+ calculateWinCXXEHStateNumbers(WinEHParentFn, EHInfo);
// Copy the state numbers to LandingPadInfo for the current function, which
// could be a handler or the parent.
for (const LandingPadInst *LP : LPads) {
MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()];
- MMI.addWinEHState(LPadMBB, EHInfo->LandingPadStateMap[LP]);
+ MMI.addWinEHState(LPadMBB, EHInfo.LandingPadStateMap[LP]);
@@ -358,334 +318,6 @@ void FunctionLoweringInfo::addSEHHandlersForLPads(
-void WinEHNumbering::createUnwindMapEntry(int ToState, ActionHandler *AH) {
- WinEHUnwindMapEntry UME;
- UME.ToState = ToState;
- if (auto *CH = dyn_cast_or_null<CleanupHandler>(AH))
- UME.Cleanup = cast<Function>(CH->getHandlerBlockOrFunc());
- else
- UME.Cleanup = nullptr;
- FuncInfo.UnwindMap.push_back(UME);
-void WinEHNumbering::createTryBlockMapEntry(int TryLow, int TryHigh,
- ArrayRef<CatchHandler *> Handlers) {
- // See if we already have an entry for this set of handlers.
- // This is using iterators rather than a range-based for loop because
- // if we find the entry we're looking for we'll need the iterator to erase it.
- int NumHandlers = Handlers.size();
- auto I = FuncInfo.TryBlockMap.begin();
- auto E = FuncInfo.TryBlockMap.end();
- for ( ; I != E; ++I) {
- auto &Entry = *I;
- if (Entry.HandlerArray.size() != (size_t)NumHandlers)
- continue;
- int N;
- for (N = 0; N < NumHandlers; ++N) {
- if (Entry.HandlerArray[N].Handler != Handlers[N]->getHandlerBlockOrFunc())
- break; // breaks out of inner loop
- }
- // If all the handlers match, this is what we were looking for.
- if (N == NumHandlers) {
- break;
- }
- }
- // If we found an existing entry for this set of handlers, extend the range
- // but move the entry to the end of the map vector. The order of entries
- // in the map is critical to the way that the runtime finds handlers.
- // FIXME: Depending on what has happened with block ordering, this may
- // incorrectly combine entries that should remain separate.
- if (I != E) {
- // Copy the existing entry.
- WinEHTryBlockMapEntry Entry = *I;
- Entry.TryLow = std::min(TryLow, Entry.TryLow);
- Entry.TryHigh = std::max(TryHigh, Entry.TryHigh);
- assert(Entry.TryLow <= Entry.TryHigh);
- // Erase the old entry and add this one to the back.
- FuncInfo.TryBlockMap.erase(I);
- FuncInfo.TryBlockMap.push_back(Entry);
- return;
- }
- // If we didn't find an entry, create a new one.
- WinEHTryBlockMapEntry TBME;
- TBME.TryLow = TryLow;
- TBME.TryHigh = TryHigh;
- assert(TBME.TryLow <= TBME.TryHigh);
- for (CatchHandler *CH : Handlers) {
- WinEHHandlerType HT;
- if (CH->getSelector()->isNullValue()) {
- HT.Adjectives = 0x40;
- HT.TypeDescriptor = nullptr;
- } else {
- auto *GV = cast<GlobalVariable>(CH->getSelector()->stripPointerCasts());
- // Selectors are always pointers to GlobalVariables with 'struct' type.
- // The struct has two fields, adjectives and a type descriptor.
- auto *CS = cast<ConstantStruct>(GV->getInitializer());
- HT.Adjectives =
- cast<ConstantInt>(CS->getAggregateElement(0U))->getZExtValue();
- HT.TypeDescriptor =
- cast<GlobalVariable>(CS->getAggregateElement(1)->stripPointerCasts());
- }
- HT.Handler = cast<Function>(CH->getHandlerBlockOrFunc());
- HT.CatchObjRecoverIdx = CH->getExceptionVarIndex();
- TBME.HandlerArray.push_back(HT);
- }
- FuncInfo.TryBlockMap.push_back(TBME);
-static void print_name(const Value *V) {
-#ifndef NDEBUG
- if (!V) {
- DEBUG(dbgs() << "null");
- return;
- }
- if (const auto *F = dyn_cast<Function>(V))
- DEBUG(dbgs() << F->getName());
- else
- DEBUG(V->dump());
-void WinEHNumbering::processCallSite(
- MutableArrayRef<std::unique_ptr<ActionHandler>> Actions,
- ImmutableCallSite CS) {
- DEBUG(dbgs() << "processCallSite (EH state = " << currentEHNumber()
- << ") for: ");
- print_name(CS ? CS.getCalledValue() : nullptr);
- DEBUG(dbgs() << '\n');
- DEBUG(dbgs() << "HandlerStack: \n");
- for (int I = 0, E = HandlerStack.size(); I < E; ++I) {
- DEBUG(dbgs() << " ");
- print_name(HandlerStack[I]->getHandlerBlockOrFunc());
- DEBUG(dbgs() << '\n');
- }
- DEBUG(dbgs() << "Actions: \n");
- for (int I = 0, E = Actions.size(); I < E; ++I) {
- DEBUG(dbgs() << " ");
- print_name(Actions[I]->getHandlerBlockOrFunc());
- DEBUG(dbgs() << '\n');
- }
- int FirstMismatch = 0;
- for (int E = std::min(HandlerStack.size(), Actions.size()); FirstMismatch < E;
- ++FirstMismatch) {
- if (HandlerStack[FirstMismatch]->getHandlerBlockOrFunc() !=
- Actions[FirstMismatch]->getHandlerBlockOrFunc())
- break;
- }
- // Remove unmatched actions from the stack and process their EH states.
- popUnmatchedActions(FirstMismatch);
- DEBUG(dbgs() << "Pushing actions for CallSite: ");
- print_name(CS ? CS.getCalledValue() : nullptr);
- DEBUG(dbgs() << '\n');
- bool LastActionWasCatch = false;
- const LandingPadInst *LastRootLPad = nullptr;
- for (size_t I = FirstMismatch; I != Actions.size(); ++I) {
- // We can reuse eh states when pushing two catches for the same invoke.
- bool CurrActionIsCatch = isa<CatchHandler>(Actions[I].get());
- auto *Handler = cast<Function>(Actions[I]->getHandlerBlockOrFunc());
- // Various conditions can lead to a handler being popped from the
- // stack and re-pushed later. That shouldn't create a new state.
- // FIXME: Can code optimization lead to re-used handlers?
- if (FuncInfo.HandlerEnclosedState.count(Handler)) {
- // If we already assigned the state enclosed by this handler re-use it.
- Actions[I]->setEHState(FuncInfo.HandlerEnclosedState[Handler]);
- continue;
- }
- const LandingPadInst* RootLPad = FuncInfo.RootLPad[Handler];
- if (CurrActionIsCatch && LastActionWasCatch && RootLPad == LastRootLPad) {
- DEBUG(dbgs() << "setEHState for handler to " << currentEHNumber() << "\n");
- Actions[I]->setEHState(currentEHNumber());
- } else {
- DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() << ", ");
- print_name(Actions[I]->getHandlerBlockOrFunc());
- DEBUG(dbgs() << ") with EH state " << NextState << "\n");
- createUnwindMapEntry(currentEHNumber(), Actions[I].get());
- DEBUG(dbgs() << "setEHState for handler to " << NextState << "\n");
- Actions[I]->setEHState(NextState);
- NextState++;
- }
- HandlerStack.push_back(std::move(Actions[I]));
- LastActionWasCatch = CurrActionIsCatch;
- LastRootLPad = RootLPad;
- }
- // This is used to defer numbering states for a handler until after the
- // last time it appears in an invoke action list.
- if (CS.isInvoke()) {
- for (int I = 0, E = HandlerStack.size(); I < E; ++I) {
- auto *Handler = cast<Function>(HandlerStack[I]->getHandlerBlockOrFunc());
- if (FuncInfo.LastInvoke[Handler] != cast<InvokeInst>(CS.getInstruction()))
- continue;
- FuncInfo.LastInvokeVisited[Handler] = true;
- DEBUG(dbgs() << "Last invoke of ");
- print_name(Handler);
- DEBUG(dbgs() << " has been visited.\n");
- }
- }
- DEBUG(dbgs() << "In EHState " << currentEHNumber() << " for CallSite: ");
- print_name(CS ? CS.getCalledValue() : nullptr);
- DEBUG(dbgs() << '\n');
-void WinEHNumbering::popUnmatchedActions(int FirstMismatch) {
- // Don't recurse while we are looping over the handler stack. Instead, defer
- // the numbering of the catch handlers until we are done popping.
- SmallVector<CatchHandler *, 4> PoppedCatches;
- for (int I = HandlerStack.size() - 1; I >= FirstMismatch; --I) {
- std::unique_ptr<ActionHandler> Handler = HandlerStack.pop_back_val();
- if (isa<CatchHandler>(Handler.get()))
- PoppedCatches.push_back(cast<CatchHandler>(Handler.release()));
- }
- int TryHigh = NextState - 1;
- int LastTryLowIdx = 0;
- for (int I = 0, E = PoppedCatches.size(); I != E; ++I) {
- CatchHandler *CH = PoppedCatches[I];
- DEBUG(dbgs() << "Popped handler with state " << CH->getEHState() << "\n");
- if (I + 1 == E || CH->getEHState() != PoppedCatches[I + 1]->getEHState()) {
- int TryLow = CH->getEHState();
- auto Handlers =
- makeArrayRef(&PoppedCatches[LastTryLowIdx], I - LastTryLowIdx + 1);
- DEBUG(dbgs() << "createTryBlockMapEntry(" << TryLow << ", " << TryHigh);
- for (size_t J = 0; J < Handlers.size(); ++J) {
- DEBUG(dbgs() << ", ");
- print_name(Handlers[J]->getHandlerBlockOrFunc());
- }
- DEBUG(dbgs() << ")\n");
- createTryBlockMapEntry(TryLow, TryHigh, Handlers);
- LastTryLowIdx = I + 1;
- }
- }
- for (CatchHandler *CH : PoppedCatches) {
- if (auto *F = dyn_cast<Function>(CH->getHandlerBlockOrFunc())) {
- if (FuncInfo.LastInvokeVisited[F]) {
- DEBUG(dbgs() << "Assigning base state " << NextState << " to ");
- print_name(F);
- DEBUG(dbgs() << '\n');
- FuncInfo.HandlerBaseState[F] = NextState;
- DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber()
- << ", null)\n");
- createUnwindMapEntry(currentEHNumber(), nullptr);
- ++NextState;
- calculateStateNumbers(*F);
- }
- else {
- DEBUG(dbgs() << "Deferring handling of ");
- print_name(F);
- DEBUG(dbgs() << " until last invoke visited.\n");
- }
- }
- delete CH;
- }
-void WinEHNumbering::calculateStateNumbers(const Function &F) {
- auto I = VisitedHandlers.insert(&F);
- if (!I.second)
- return; // We've already visited this handler, don't renumber it.
- int OldBaseState = CurrentBaseState;
- if (FuncInfo.HandlerBaseState.count(&F)) {
- CurrentBaseState = FuncInfo.HandlerBaseState[&F];
- }
- size_t SavedHandlerStackSize = HandlerStack.size();
- DEBUG(dbgs() << "Calculating state numbers for: " << F.getName() << '\n');
- SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
- for (const BasicBlock &BB : F) {
- for (const Instruction &I : BB) {
- const auto *CI = dyn_cast<CallInst>(&I);
- if (!CI || CI->doesNotThrow())
- continue;
- processCallSite(None, CI);
- }
- const auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
- if (!II)
- continue;
- const LandingPadInst *LPI = II->getLandingPadInst();
- auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode());
- if (!ActionsCall)
- continue;
- assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions);
- parseEHActions(ActionsCall, ActionList);
- if (ActionList.empty())
- continue;
- processCallSite(ActionList, II);
- ActionList.clear();
- FuncInfo.LandingPadStateMap[LPI] = currentEHNumber();
- DEBUG(dbgs() << "Assigning state " << currentEHNumber()
- << " to landing pad at " << LPI->getParent()->getName()
- << '\n');
- }
- // Pop any actions that were pushed on the stack for this function.
- popUnmatchedActions(SavedHandlerStackSize);
- DEBUG(dbgs() << "Assigning max state " << NextState - 1
- << " to " << F.getName() << '\n');
- FuncInfo.CatchHandlerMaxState[&F] = NextState - 1;
- CurrentBaseState = OldBaseState;
-// This function follows the same basic traversal as calculateStateNumbers
-// but it is necessary to identify the root landing pad associated
-// with each action before we start assigning state numbers.
-void WinEHNumbering::findActionRootLPads(const Function &F) {
- auto I = VisitedHandlers.insert(&F);
- if (!I.second)
- return; // We've already visited this handler, don't revisit it.
- SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
- for (const BasicBlock &BB : F) {
- const auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
- if (!II)
- continue;
- const LandingPadInst *LPI = II->getLandingPadInst();
- auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode());
- if (!ActionsCall)
- continue;
- assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions);
- parseEHActions(ActionsCall, ActionList);
- if (ActionList.empty())
- continue;
- for (int I = 0, E = ActionList.size(); I < E; ++I) {
- if (auto *Handler
- = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc())) {
- FuncInfo.LastInvoke[Handler] = II;
- // Don't replace the root landing pad if we previously saw this
- // handler in a different function.
- if (FuncInfo.RootLPad.count(Handler) &&
- FuncInfo.RootLPad[Handler]->getParent()->getParent() != &F)
- continue;
- DEBUG(dbgs() << "Setting root lpad for ");
- print_name(Handler);
- DEBUG(dbgs() << " to " << LPI->getParent()->getName() << '\n');
- FuncInfo.RootLPad[Handler] = LPI;
- }
- }
- // Walk the actions again and look for nested handlers. This has to
- // happen after all of the actions have been processed in the current
- // function.
- for (int I = 0, E = ActionList.size(); I < E; ++I)
- if (auto *Handler
- = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc()))
- findActionRootLPads(*Handler);
- ActionList.clear();
- }
/// clear - Clear out all the function-specific state. This returns this
/// FunctionLoweringInfo to an empty state, ready to be used for a
/// different function.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index eeaebf78..96e2ff8 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -602,10 +602,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
- SDValue Res = GetPromotedInteger(N->getOperand(0));
- SDValue Amt = N->getOperand(1);
- Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt;
- return DAG.getNode(ISD::SHL, SDLoc(N), Res.getValueType(), Res, Amt);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ LHS = GetPromotedInteger(LHS);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = ZExtPromotedInteger(RHS);
+ return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS);
SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
@@ -625,19 +628,25 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
// The input value must be properly sign extended.
- SDValue Res = SExtPromotedInteger(N->getOperand(0));
- SDValue Amt = N->getOperand(1);
- Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt;
- return DAG.getNode(ISD::SRA, SDLoc(N), Res.getValueType(), Res, Amt);
+ if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ LHS = SExtPromotedInteger(LHS);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = ZExtPromotedInteger(RHS);
+ return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS);
SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
// The input value must be properly zero extended.
- SDValue Res = ZExtPromotedInteger(N->getOperand(0));
- SDValue Amt = N->getOperand(1);
- Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt;
- return DAG.getNode(ISD::SRL, SDLoc(N), Res.getValueType(), Res, Amt);
+ if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ LHS = ZExtPromotedInteger(LHS);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = ZExtPromotedInteger(RHS);
+ return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS);
SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 3853ada..f4c7b59 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -71,7 +71,7 @@ SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
if (!SUnits.empty())
Addr = &SUnits[0];
- SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
+ SUnits.emplace_back(N, (unsigned)SUnits.size());
assert((Addr == nullptr || Addr == &SUnits[0]) &&
"SUnits std::vector reallocated on the fly!");
SUnits.back().OrigNode = &SUnits.back();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index efd4bd9..cf51e75 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1810,6 +1810,13 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
return SDValue(N, 0);
+SDValue SelectionDAG::getBitcast(EVT VT, SDValue V) {
+ if (VT == V.getValueType())
+ return V;
+ return getNode(ISD::BITCAST, SDLoc(V), VT, V);
/// getAddrSpaceCast - Return an AddrSpaceCastSDNode.
SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,
unsigned SrcAS, unsigned DestAS) {
@@ -2425,6 +2432,19 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownOne = KnownOne.trunc(BitWidth);
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX: {
+ APInt Op0Zero, Op0One;
+ APInt Op1Zero, Op1One;
+ computeKnownBits(Op.getOperand(0), Op0Zero, Op0One, Depth);
+ computeKnownBits(Op.getOperand(1), Op1Zero, Op1One, Depth);
+ KnownZero = Op0Zero & Op1Zero;
+ KnownOne = Op0One & Op1One;
+ break;
+ }
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
if (unsigned Align = InferPtrAlignment(Op)) {
@@ -2528,7 +2548,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
if (Tmp == 1) return 1; // Early out.
Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
return std::min(Tmp, Tmp2);
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (Tmp == 1)
+ return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ return std::min(Tmp, Tmp2);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
@@ -2903,7 +2931,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
- case ISD::SINT_TO_FP: {
+ case ISD::SINT_TO_FP:
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ case ISD::CTPOP: {
EVT SVT = VT.getScalarType();
EVT InVT = BV->getValueType(0);
EVT InSVT = InVT.getScalarType();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 85303d2..8ba957d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1002,7 +1002,16 @@ bool SelectionDAGBuilder::findValue(const Value *V) const {
SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
// If we already have an SDValue for this value, use it.
SDValue &N = NodeMap[V];
- if (N.getNode()) return N;
+ if (N.getNode()) {
+ if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) {
+ // Remove the debug location from the node as the node is about to be used
+ // in a location which may differ from the original debug location. This
+ // is relevant to Constant and ConstantFP nodes because they can appear
+ // as constant expressions inside PHI nodes.
+ N->setDebugLoc(DebugLoc());
+ }
+ return N;
+ }
// Otherwise create a new SDValue and remember it.
SDValue Val = getValueImpl(V);
@@ -2282,7 +2291,11 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector)
VT = TLI.getTypeToTransformTo(Ctx, VT);
- if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT)) {
+ if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) &&
+ // If the underlying comparison instruction is used by any other instruction,
+ // the consumed instructions won't be destroyed, so it is not profitable
+ // to convert to a min/max.
+ cast<SelectInst>(&I)->getCondition()->hasOneUse()) {
OpCode = Opc;
LHSVal = getValue(LHS);
RHSVal = getValue(RHS);
@@ -2848,7 +2861,17 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
bool isVolatile = I.isVolatile();
bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
- bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
+ // The IR notion of invariant_load only guarantees that all *non-faulting*
+ // invariant loads result in the same value. The MI notion of invariant load
+ // guarantees that the load can be legally moved to any location within its
+ // containing function. The MI notion of invariant_load is stronger than the
+ // IR notion of invariant_load -- an MI invariant_load is an IR invariant_load
+ // with a guarantee that the location being loaded from is dereferenceable
+ // throughout the function's lifetime.
+ bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr &&
+ isDereferenceablePointer(SV, *DAG.getTarget().getDataLayout());
unsigned Alignment = I.getAlignment();
AAMDNodes AAInfo;
@@ -7437,7 +7460,7 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
JumpTableHeader JTH(Clusters[First].Low->getValue(),
Clusters[Last].High->getValue(), SI->getCondition(),
nullptr, false);
- JTCases.push_back(JumpTableBlock(JTH, JT));
+ JTCases.emplace_back(std::move(JTH), std::move(JT));
JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
JTCases.size() - 1, Weight);
@@ -7600,7 +7623,7 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
const int BitWidth =
- assert((High - Low + 1).sle(BitWidth) && "Case range must fit in bit mask!");
+ assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!");
if (Low.isNonNegative() && High.slt(BitWidth)) {
// Optimize the case where all the case values fit in a
@@ -7628,10 +7651,9 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
// Update Mask, Bits and ExtraWeight.
uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
- for (uint64_t j = Lo; j <= Hi; ++j) {
- CB->Mask |= 1ULL << j;
- CB->Bits++;
- }
+ assert(Hi >= Lo && Hi < 64 && "Invalid bit case!");
+ CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo;
+ CB->Bits += Hi - Lo + 1;
CB->ExtraWeight += Clusters[i].Weight;
TotalWeight += Clusters[i].Weight;
assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!");
@@ -7650,9 +7672,9 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight));
- BitTestCases.push_back(BitTestBlock(LowBound, CmpRange, SI->getCondition(),
- -1U, MVT::Other, false, nullptr,
- nullptr, std::move(BTI)));
+ BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
+ SI->getCondition(), -1U, MVT::Other, false, nullptr,
+ nullptr, std::move(BTI));
BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
BitTestCases.size() - 1, TotalWeight);
@@ -7746,8 +7768,10 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) {
Clusters[DstIndex++] = BitTestCluster;
} else {
- for (unsigned I = First; I <= Last; ++I)
- std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
+ size_t NumClusters = Last - First + 1;
+ std::memmove(&Clusters[DstIndex], &Clusters[First],
+ sizeof(Clusters[0]) * NumClusters);
+ DstIndex += NumClusters;
@@ -7783,22 +7807,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
const APInt &BigValue = Big.Low->getValue();
// Check that there is only one bit different.
- if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
- (SmallValue | BigValue) == BigValue) {
- // Isolate the common bit.
- APInt CommonBit = BigValue & ~SmallValue;
- assert((SmallValue | CommonBit) == BigValue &&
- CommonBit.countPopulation() == 1 && "Not a common bit?");
+ APInt CommonBit = BigValue ^ SmallValue;
+ if (CommonBit.isPowerOf2()) {
SDValue CondLHS = getValue(Cond);
EVT VT = CondLHS.getValueType();
SDLoc DL = getCurSDLoc();
SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
DAG.getConstant(CommonBit, DL, VT));
- SDValue Cond = DAG.getSetCC(DL, MVT::i1, Or,
- DAG.getConstant(BigValue, DL, VT),
+ SDValue Cond = DAG.getSetCC(
+ DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT),
// Update successor info.
// Both Small and Big will jump to Small.BB, so we sum up the weights.
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 2d4ab6c..8bbfa01 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -238,17 +238,6 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
SDValue ActualCallee = Builder.getValue(ISP.getActualCallee());
- // Handle immediate and symbolic callees.
- if (auto *ConstCallee = dyn_cast<ConstantSDNode>(ActualCallee.getNode()))
- ActualCallee = Builder.DAG.getIntPtrConstant(ConstCallee->getZExtValue(),
- Builder.getCurSDLoc(),
- /*isTarget=*/true);
- else if (auto *SymbolicCallee =
- dyn_cast<GlobalAddressSDNode>(ActualCallee.getNode()))
- ActualCallee = Builder.DAG.getTargetGlobalAddress(
- SymbolicCallee->getGlobal(), SDLoc(SymbolicCallee),
- SymbolicCallee->getValueType(0));
assert(CS.getCallingConv() != CallingConv::AnyReg &&
"anyregcc is not supported on statepoints!");
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 833da4b..9daf2a5 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -254,7 +254,7 @@ const MCExpr *
TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
unsigned JTI,MCContext &Ctx) const{
// The normal PIC reloc base is the label at the start of the jump table.
- return MCSymbolRefExpr::Create(MF->getJTISymbol(JTI, Ctx), Ctx);
+ return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index ffe59c1..1e8e03f 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -315,9 +315,9 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
// Create an expression to calculate the offset of the callsite from function
// entry.
- const MCExpr *CSOffsetExpr = MCBinaryExpr::CreateSub(
- MCSymbolRefExpr::Create(MILabel, OutContext),
- MCSymbolRefExpr::Create(AP.CurrentFnSymForSize, OutContext),
+ const MCExpr *CSOffsetExpr = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(MILabel, OutContext),
+ MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext),
CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations),
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 92488de..c809087 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -471,11 +471,13 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
MI->getOpcode() == TargetOpcode::PATCHPOINT) {
// Fold stackmap/patchpoint.
NewMI = foldPatchpoint(MF, MI, Ops, FI, *this);
+ if (NewMI)
+ MBB->insert(MI, NewMI);
} else {
// Ask the target to do the actual folding.
- NewMI =foldMemoryOperandImpl(MF, MI, Ops, FI);
+ NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI);
if (NewMI) {
NewMI->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
// Add a memory operand, foldMemoryOperandImpl doesn't do that.
@@ -493,8 +495,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
NewMI->addMemOperand(MF, MMO);
- // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI.
- return MBB->insert(MI, NewMI);
+ return NewMI;
// Straight COPY may fold as load/store.
@@ -539,15 +540,15 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
isLoadFromStackSlot(LoadMI, FrameIndex)) {
// Fold stackmap/patchpoint.
NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
+ if (NewMI)
+ NewMI = MBB.insert(MI, NewMI);
} else {
// Ask the target to do the actual folding.
- NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI);
+ NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI);
if (!NewMI) return nullptr;
- NewMI = MBB.insert(MI, NewMI);
// Copy the memoperands from the load to the folded instruction.
if (MI->memoperands_empty()) {
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index b7f1db6..1bc89aa 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -1632,7 +1632,8 @@ TargetLoweringBase::getTypeLegalizationCost(Type *Ty) const {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool TargetLoweringBase::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
+ Type *Ty,
+ unsigned AS) const {
// The default implementation of this implements a conservative RISCy, r+r and
// r+i addr mode.
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index a32bdf8..d7b043d 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -30,7 +30,7 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ELF.h"
@@ -63,7 +63,8 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
const MCSymbol *Sym) const {
SmallString<64> NameData("DW.ref.");
NameData += Sym->getName();
- MCSymbol *Label = getContext().getOrCreateSymbol(NameData);
+ MCSymbolELF *Label =
+ cast<MCSymbolELF>(getContext().getOrCreateSymbol(NameData));
Streamer.EmitSymbolAttribute(Label, MCSA_Hidden);
Streamer.EmitSymbolAttribute(Label, MCSA_Weak);
StringRef Prefix = ".data.";
@@ -75,8 +76,8 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
- const MCExpr *E = MCConstantExpr::Create(Size, getContext());
- Streamer.EmitELFSize(Label, E);
+ const MCExpr *E = MCConstantExpr::create(Size, getContext());
+ Streamer.emitELFSize(Label, E);
Streamer.EmitSymbolValue(Sym, Size);
@@ -101,7 +102,7 @@ const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
return TargetLoweringObjectFile::
- getTTypeReference(MCSymbolRefExpr::Create(SSym, getContext()),
+ getTTypeReference(MCSymbolRefExpr::create(SSym, getContext()),
Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
@@ -684,7 +685,7 @@ const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference(
return TargetLoweringObjectFile::
- getTTypeReference(MCSymbolRefExpr::Create(SSym, getContext()),
+ getTTypeReference(MCSymbolRefExpr::create(SSym, getContext()),
Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
@@ -760,16 +761,16 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
StubValueTy(const_cast<MCSymbol *>(Sym), true /* access indirectly */);
const MCExpr *BSymExpr =
- MCSymbolRefExpr::Create(BaseSym, MCSymbolRefExpr::VK_None, Ctx);
+ MCSymbolRefExpr::create(BaseSym, MCSymbolRefExpr::VK_None, Ctx);
const MCExpr *LHS =
- MCSymbolRefExpr::Create(Stub, MCSymbolRefExpr::VK_None, Ctx);
+ MCSymbolRefExpr::create(Stub, MCSymbolRefExpr::VK_None, Ctx);
if (!Offset)
- return MCBinaryExpr::CreateSub(LHS, BSymExpr, Ctx);
+ return MCBinaryExpr::createSub(LHS, BSymExpr, Ctx);
const MCExpr *RHS =
- MCBinaryExpr::CreateAdd(BSymExpr, MCConstantExpr::Create(Offset, Ctx), Ctx);
- return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+ MCBinaryExpr::createAdd(BSymExpr, MCConstantExpr::create(Offset, Ctx), Ctx);
+ return MCBinaryExpr::createSub(LHS, RHS, Ctx);
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 9fb1b5b..32d5100 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -417,17 +417,11 @@ void VirtRegRewriter::rewrite() {
// Finally, remove any identity copies.
if (MI->isIdentityCopy()) {
- if (MI->getNumOperands() == 2) {
- DEBUG(dbgs() << "Deleting identity copy.\n");
- if (Indexes)
- Indexes->removeMachineInstrFromMaps(MI);
- // It's safe to erase MI because MII has already been incremented.
- MI->eraseFromParent();
- } else {
- // Transform identity copy to a KILL to deal with subregisters.
- MI->setDesc(TII->get(TargetOpcode::KILL));
- DEBUG(dbgs() << "Identity copy: " << *MI);
- }
+ DEBUG(dbgs() << "Deleting identity copy.\n");
+ if (Indexes)
+ Indexes->removeMachineInstrFromMaps(MI);
+ // It's safe to erase MI because MII has already been incremented.
+ MI->eraseFromParent();
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index 7246e1c..c2b3d84 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -2480,3 +2480,377 @@ void llvm::parseEHActions(
std::reverse(Actions.begin(), Actions.end());
+namespace {
+struct WinEHNumbering {
+ WinEHNumbering(WinEHFuncInfo &FuncInfo) : FuncInfo(FuncInfo),
+ CurrentBaseState(-1), NextState(0) {}
+ WinEHFuncInfo &FuncInfo;
+ int CurrentBaseState;
+ int NextState;
+ SmallVector<std::unique_ptr<ActionHandler>, 4> HandlerStack;
+ SmallPtrSet<const Function *, 4> VisitedHandlers;
+ int currentEHNumber() const {
+ return HandlerStack.empty() ? CurrentBaseState : HandlerStack.back()->getEHState();
+ }
+ void createUnwindMapEntry(int ToState, ActionHandler *AH);
+ void createTryBlockMapEntry(int TryLow, int TryHigh,
+ ArrayRef<CatchHandler *> Handlers);
+ void processCallSite(MutableArrayRef<std::unique_ptr<ActionHandler>> Actions,
+ ImmutableCallSite CS);
+ void popUnmatchedActions(int FirstMismatch);
+ void calculateStateNumbers(const Function &F);
+ void findActionRootLPads(const Function &F);
+void WinEHNumbering::createUnwindMapEntry(int ToState, ActionHandler *AH) {
+ WinEHUnwindMapEntry UME;
+ UME.ToState = ToState;
+ if (auto *CH = dyn_cast_or_null<CleanupHandler>(AH))
+ UME.Cleanup = cast<Function>(CH->getHandlerBlockOrFunc());
+ else
+ UME.Cleanup = nullptr;
+ FuncInfo.UnwindMap.push_back(UME);
+void WinEHNumbering::createTryBlockMapEntry(int TryLow, int TryHigh,
+ ArrayRef<CatchHandler *> Handlers) {
+ // See if we already have an entry for this set of handlers.
+ // This is using iterators rather than a range-based for loop because
+ // if we find the entry we're looking for we'll need the iterator to erase it.
+ int NumHandlers = Handlers.size();
+ auto I = FuncInfo.TryBlockMap.begin();
+ auto E = FuncInfo.TryBlockMap.end();
+ for ( ; I != E; ++I) {
+ auto &Entry = *I;
+ if (Entry.HandlerArray.size() != (size_t)NumHandlers)
+ continue;
+ int N;
+ for (N = 0; N < NumHandlers; ++N) {
+ if (Entry.HandlerArray[N].Handler != Handlers[N]->getHandlerBlockOrFunc())
+ break; // breaks out of inner loop
+ }
+ // If all the handlers match, this is what we were looking for.
+ if (N == NumHandlers) {
+ break;
+ }
+ }
+ // If we found an existing entry for this set of handlers, extend the range
+ // but move the entry to the end of the map vector. The order of entries
+ // in the map is critical to the way that the runtime finds handlers.
+ // FIXME: Depending on what has happened with block ordering, this may
+ // incorrectly combine entries that should remain separate.
+ if (I != E) {
+ // Copy the existing entry.
+ WinEHTryBlockMapEntry Entry = *I;
+ Entry.TryLow = std::min(TryLow, Entry.TryLow);
+ Entry.TryHigh = std::max(TryHigh, Entry.TryHigh);
+ assert(Entry.TryLow <= Entry.TryHigh);
+ // Erase the old entry and add this one to the back.
+ FuncInfo.TryBlockMap.erase(I);
+ FuncInfo.TryBlockMap.push_back(Entry);
+ return;
+ }
+ // If we didn't find an entry, create a new one.
+ WinEHTryBlockMapEntry TBME;
+ TBME.TryLow = TryLow;
+ TBME.TryHigh = TryHigh;
+ assert(TBME.TryLow <= TBME.TryHigh);
+ for (CatchHandler *CH : Handlers) {
+ WinEHHandlerType HT;
+ if (CH->getSelector()->isNullValue()) {
+ HT.Adjectives = 0x40;
+ HT.TypeDescriptor = nullptr;
+ } else {
+ auto *GV = cast<GlobalVariable>(CH->getSelector()->stripPointerCasts());
+ // Selectors are always pointers to GlobalVariables with 'struct' type.
+ // The struct has two fields, adjectives and a type descriptor.
+ auto *CS = cast<ConstantStruct>(GV->getInitializer());
+ HT.Adjectives =
+ cast<ConstantInt>(CS->getAggregateElement(0U))->getZExtValue();
+ HT.TypeDescriptor =
+ cast<GlobalVariable>(CS->getAggregateElement(1)->stripPointerCasts());
+ }
+ HT.Handler = cast<Function>(CH->getHandlerBlockOrFunc());
+ HT.CatchObjRecoverIdx = CH->getExceptionVarIndex();
+ TBME.HandlerArray.push_back(HT);
+ }
+ FuncInfo.TryBlockMap.push_back(TBME);
+static void print_name(const Value *V) {
+#ifndef NDEBUG
+ if (!V) {
+ DEBUG(dbgs() << "null");
+ return;
+ }
+ if (const auto *F = dyn_cast<Function>(V))
+ DEBUG(dbgs() << F->getName());
+ else
+ DEBUG(V->dump());
+void WinEHNumbering::processCallSite(
+ MutableArrayRef<std::unique_ptr<ActionHandler>> Actions,
+ ImmutableCallSite CS) {
+ DEBUG(dbgs() << "processCallSite (EH state = " << currentEHNumber()
+ << ") for: ");
+ print_name(CS ? CS.getCalledValue() : nullptr);
+ DEBUG(dbgs() << '\n');
+ DEBUG(dbgs() << "HandlerStack: \n");
+ for (int I = 0, E = HandlerStack.size(); I < E; ++I) {
+ DEBUG(dbgs() << " ");
+ print_name(HandlerStack[I]->getHandlerBlockOrFunc());
+ DEBUG(dbgs() << '\n');
+ }
+ DEBUG(dbgs() << "Actions: \n");
+ for (int I = 0, E = Actions.size(); I < E; ++I) {
+ DEBUG(dbgs() << " ");
+ print_name(Actions[I]->getHandlerBlockOrFunc());
+ DEBUG(dbgs() << '\n');
+ }
+ int FirstMismatch = 0;
+ for (int E = std::min(HandlerStack.size(), Actions.size()); FirstMismatch < E;
+ ++FirstMismatch) {
+ if (HandlerStack[FirstMismatch]->getHandlerBlockOrFunc() !=
+ Actions[FirstMismatch]->getHandlerBlockOrFunc())
+ break;
+ }
+ // Remove unmatched actions from the stack and process their EH states.
+ popUnmatchedActions(FirstMismatch);
+ DEBUG(dbgs() << "Pushing actions for CallSite: ");
+ print_name(CS ? CS.getCalledValue() : nullptr);
+ DEBUG(dbgs() << '\n');
+ bool LastActionWasCatch = false;
+ const LandingPadInst *LastRootLPad = nullptr;
+ for (size_t I = FirstMismatch; I != Actions.size(); ++I) {
+ // We can reuse eh states when pushing two catches for the same invoke.
+ bool CurrActionIsCatch = isa<CatchHandler>(Actions[I].get());
+ auto *Handler = cast<Function>(Actions[I]->getHandlerBlockOrFunc());
+ // Various conditions can lead to a handler being popped from the
+ // stack and re-pushed later. That shouldn't create a new state.
+ // FIXME: Can code optimization lead to re-used handlers?
+ if (FuncInfo.HandlerEnclosedState.count(Handler)) {
+ // If we already assigned the state enclosed by this handler re-use it.
+ Actions[I]->setEHState(FuncInfo.HandlerEnclosedState[Handler]);
+ continue;
+ }
+ const LandingPadInst* RootLPad = FuncInfo.RootLPad[Handler];
+ if (CurrActionIsCatch && LastActionWasCatch && RootLPad == LastRootLPad) {
+ DEBUG(dbgs() << "setEHState for handler to " << currentEHNumber() << "\n");
+ Actions[I]->setEHState(currentEHNumber());
+ } else {
+ DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() << ", ");
+ print_name(Actions[I]->getHandlerBlockOrFunc());
+ DEBUG(dbgs() << ") with EH state " << NextState << "\n");
+ createUnwindMapEntry(currentEHNumber(), Actions[I].get());
+ DEBUG(dbgs() << "setEHState for handler to " << NextState << "\n");
+ Actions[I]->setEHState(NextState);
+ NextState++;
+ }
+ HandlerStack.push_back(std::move(Actions[I]));
+ LastActionWasCatch = CurrActionIsCatch;
+ LastRootLPad = RootLPad;
+ }
+ // This is used to defer numbering states for a handler until after the
+ // last time it appears in an invoke action list.
+ if (CS.isInvoke()) {
+ for (int I = 0, E = HandlerStack.size(); I < E; ++I) {
+ auto *Handler = cast<Function>(HandlerStack[I]->getHandlerBlockOrFunc());
+ if (FuncInfo.LastInvoke[Handler] != cast<InvokeInst>(CS.getInstruction()))
+ continue;
+ FuncInfo.LastInvokeVisited[Handler] = true;
+ DEBUG(dbgs() << "Last invoke of ");
+ print_name(Handler);
+ DEBUG(dbgs() << " has been visited.\n");
+ }
+ }
+ DEBUG(dbgs() << "In EHState " << currentEHNumber() << " for CallSite: ");
+ print_name(CS ? CS.getCalledValue() : nullptr);
+ DEBUG(dbgs() << '\n');
+void WinEHNumbering::popUnmatchedActions(int FirstMismatch) {
+ // Don't recurse while we are looping over the handler stack. Instead, defer
+ // the numbering of the catch handlers until we are done popping.
+ SmallVector<CatchHandler *, 4> PoppedCatches;
+ for (int I = HandlerStack.size() - 1; I >= FirstMismatch; --I) {
+ std::unique_ptr<ActionHandler> Handler = HandlerStack.pop_back_val();
+ if (isa<CatchHandler>(Handler.get()))
+ PoppedCatches.push_back(cast<CatchHandler>(Handler.release()));
+ }
+ int TryHigh = NextState - 1;
+ int LastTryLowIdx = 0;
+ for (int I = 0, E = PoppedCatches.size(); I != E; ++I) {
+ CatchHandler *CH = PoppedCatches[I];
+ DEBUG(dbgs() << "Popped handler with state " << CH->getEHState() << "\n");
+ if (I + 1 == E || CH->getEHState() != PoppedCatches[I + 1]->getEHState()) {
+ int TryLow = CH->getEHState();
+ auto Handlers =
+ makeArrayRef(&PoppedCatches[LastTryLowIdx], I - LastTryLowIdx + 1);
+ DEBUG(dbgs() << "createTryBlockMapEntry(" << TryLow << ", " << TryHigh);
+ for (size_t J = 0; J < Handlers.size(); ++J) {
+ DEBUG(dbgs() << ", ");
+ print_name(Handlers[J]->getHandlerBlockOrFunc());
+ }
+ DEBUG(dbgs() << ")\n");
+ createTryBlockMapEntry(TryLow, TryHigh, Handlers);
+ LastTryLowIdx = I + 1;
+ }
+ }
+ for (CatchHandler *CH : PoppedCatches) {
+ if (auto *F = dyn_cast<Function>(CH->getHandlerBlockOrFunc())) {
+ if (FuncInfo.LastInvokeVisited[F]) {
+ DEBUG(dbgs() << "Assigning base state " << NextState << " to ");
+ print_name(F);
+ DEBUG(dbgs() << '\n');
+ FuncInfo.HandlerBaseState[F] = NextState;
+ DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber()
+ << ", null)\n");
+ createUnwindMapEntry(currentEHNumber(), nullptr);
+ ++NextState;
+ calculateStateNumbers(*F);
+ }
+ else {
+ DEBUG(dbgs() << "Deferring handling of ");
+ print_name(F);
+ DEBUG(dbgs() << " until last invoke visited.\n");
+ }
+ }
+ delete CH;
+ }
+void WinEHNumbering::calculateStateNumbers(const Function &F) {
+ auto I = VisitedHandlers.insert(&F);
+ if (!I.second)
+ return; // We've already visited this handler, don't renumber it.
+ int OldBaseState = CurrentBaseState;
+ if (FuncInfo.HandlerBaseState.count(&F)) {
+ CurrentBaseState = FuncInfo.HandlerBaseState[&F];
+ }
+ size_t SavedHandlerStackSize = HandlerStack.size();
+ DEBUG(dbgs() << "Calculating state numbers for: " << F.getName() << '\n');
+ SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
+ for (const BasicBlock &BB : F) {
+ for (const Instruction &I : BB) {
+ const auto *CI = dyn_cast<CallInst>(&I);
+ if (!CI || CI->doesNotThrow())
+ continue;
+ processCallSite(None, CI);
+ }
+ const auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
+ if (!II)
+ continue;
+ const LandingPadInst *LPI = II->getLandingPadInst();
+ auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode());
+ if (!ActionsCall)
+ continue;
+ assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions);
+ parseEHActions(ActionsCall, ActionList);
+ if (ActionList.empty())
+ continue;
+ processCallSite(ActionList, II);
+ ActionList.clear();
+ FuncInfo.LandingPadStateMap[LPI] = currentEHNumber();
+ DEBUG(dbgs() << "Assigning state " << currentEHNumber()
+ << " to landing pad at " << LPI->getParent()->getName()
+ << '\n');
+ }
+ // Pop any actions that were pushed on the stack for this function.
+ popUnmatchedActions(SavedHandlerStackSize);
+ DEBUG(dbgs() << "Assigning max state " << NextState - 1
+ << " to " << F.getName() << '\n');
+ FuncInfo.CatchHandlerMaxState[&F] = NextState - 1;
+ CurrentBaseState = OldBaseState;
+// This function follows the same basic traversal as calculateStateNumbers
+// but it is necessary to identify the root landing pad associated
+// with each action before we start assigning state numbers.
+void WinEHNumbering::findActionRootLPads(const Function &F) {
+ auto I = VisitedHandlers.insert(&F);
+ if (!I.second)
+ return; // We've already visited this handler, don't revisit it.
+ SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
+ for (const BasicBlock &BB : F) {
+ const auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
+ if (!II)
+ continue;
+ const LandingPadInst *LPI = II->getLandingPadInst();
+ auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode());
+ if (!ActionsCall)
+ continue;
+ assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions);
+ parseEHActions(ActionsCall, ActionList);
+ if (ActionList.empty())
+ continue;
+ for (int I = 0, E = ActionList.size(); I < E; ++I) {
+ if (auto *Handler
+ = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc())) {
+ FuncInfo.LastInvoke[Handler] = II;
+ // Don't replace the root landing pad if we previously saw this
+ // handler in a different function.
+ if (FuncInfo.RootLPad.count(Handler) &&
+ FuncInfo.RootLPad[Handler]->getParent()->getParent() != &F)
+ continue;
+ DEBUG(dbgs() << "Setting root lpad for ");
+ print_name(Handler);
+ DEBUG(dbgs() << " to " << LPI->getParent()->getName() << '\n');
+ FuncInfo.RootLPad[Handler] = LPI;
+ }
+ }
+ // Walk the actions again and look for nested handlers. This has to
+ // happen after all of the actions have been processed in the current
+ // function.
+ for (int I = 0, E = ActionList.size(); I < E; ++I)
+ if (auto *Handler
+ = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc()))
+ findActionRootLPads(*Handler);
+ ActionList.clear();
+ }
+void llvm::calculateWinCXXEHStateNumbers(const Function *ParentFn,
+ WinEHFuncInfo &FuncInfo) {
+ // Return if it's already been done.
+ if (!FuncInfo.LandingPadStateMap.empty())
+ return;
+ WinEHNumbering Num(FuncInfo);
+ Num.findActionRootLPads(*ParentFn);
+ // The VisitedHandlers list is used by both findActionRootLPads and
+ // calculateStateNumbers, but both functions need to visit all handlers.
+ Num.VisitedHandlers.clear();
+ Num.calculateStateNumbers(*ParentFn);
+ // Pop everything on the handler stack.
+ // It may be necessary to call this more than once because a handler can
+ // be pushed on the stack as a result of clearing the stack.
+ while (!Num.HandlerStack.empty())
+ Num.processCallSite(None, ImmutableCallSite());
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index cc2fbbd..baab387 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -352,7 +352,7 @@ void DWARFContext::parseTypeUnits() {
if (!TUs.empty())
for (const auto &I : getTypesSections()) {
- TUs.push_back(DWARFUnitSection<DWARFTypeUnit>());
+ TUs.emplace_back();
TUs.back().parse(*this, I.second);
@@ -365,7 +365,7 @@ void DWARFContext::parseDWOTypeUnits() {
if (!DWOTUs.empty())
for (const auto &I : getTypesDWOSections()) {
- DWOTUs.push_back(DWARFUnitSection<DWARFTypeUnit>());
+ DWOTUs.emplace_back();
DWOTUs.back().parseDWO(*this, I.second);
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index b63af6a..a0bee0d 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -25,6 +25,7 @@ void DWARFDebugLine::Prologue::clear() {
TotalLength = Version = PrologueLength = 0;
MinInstLength = MaxOpsPerInst = DefaultIsStmt = LineBase = LineRange = 0;
OpcodeBase = 0;
+ IsDWARF64 = false;
@@ -32,9 +33,9 @@ void DWARFDebugLine::Prologue::clear() {
void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const {
OS << "Line table prologue:\n"
- << format(" total_length: 0x%8.8x\n", TotalLength)
+ << format(" total_length: 0x%8.8" PRIx64 "\n", TotalLength)
<< format(" version: %u\n", Version)
- << format(" prologue_length: 0x%8.8x\n", PrologueLength)
+ << format(" prologue_length: 0x%8.8" PRIx64 "\n", PrologueLength)
<< format(" min_inst_length: %u\n", MinInstLength)
<< format(Version >= 4 ? "max_ops_per_inst: %u\n" : "", MaxOpsPerInst)
<< format(" default_is_stmt: %u\n", DefaultIsStmt)
@@ -67,16 +68,23 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const {
bool DWARFDebugLine::Prologue::parse(DataExtractor debug_line_data,
uint32_t *offset_ptr) {
- const uint32_t prologue_offset = *offset_ptr;
+ const uint64_t prologue_offset = *offset_ptr;
TotalLength = debug_line_data.getU32(offset_ptr);
+ if (TotalLength == UINT32_MAX) {
+ IsDWARF64 = true;
+ TotalLength = debug_line_data.getU64(offset_ptr);
+ } else if (TotalLength > 0xffffff00) {
+ return false;
+ }
Version = debug_line_data.getU16(offset_ptr);
if (Version < 2)
return false;
- PrologueLength = debug_line_data.getU32(offset_ptr);
- const uint32_t end_prologue_offset = PrologueLength + *offset_ptr;
+ PrologueLength = debug_line_data.getUnsigned(offset_ptr,
+ sizeofPrologueLength());
+ const uint64_t end_prologue_offset = PrologueLength + *offset_ptr;
MinInstLength = debug_line_data.getU8(offset_ptr);
if (Version >= 4)
MaxOpsPerInst = debug_line_data.getU8(offset_ptr);
@@ -114,9 +122,10 @@ bool DWARFDebugLine::Prologue::parse(DataExtractor debug_line_data,
if (*offset_ptr != end_prologue_offset) {
- fprintf(stderr, "warning: parsing line table prologue at 0x%8.8x should"
- " have ended at 0x%8.8x but it ended at 0x%8.8x\n",
- prologue_offset, end_prologue_offset, *offset_ptr);
+ fprintf(stderr, "warning: parsing line table prologue at 0x%8.8" PRIx64
+ " should have ended at 0x%8.8" PRIx64
+ " but it ended at 0x%8.8" PRIx64 "\n",
+ prologue_offset, end_prologue_offset, (uint64_t)*offset_ptr);
return false;
return true;
@@ -258,7 +267,7 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data,
const uint32_t end_offset = debug_line_offset + Prologue.TotalLength +
- sizeof(Prologue.TotalLength);
+ Prologue.sizeofTotalLength();
ParsingState State(this);
@@ -522,10 +531,36 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data,
return end_offset;
+DWARFDebugLine::LineTable::findRowInSeq(const DWARFDebugLine::Sequence &seq,
+ uint64_t address) const {
+ if (!seq.containsPC(address))
+ return UnknownRowIndex;
+ // Search for instruction address in the rows describing the sequence.
+ // Rows are stored in a vector, so we may use arithmetical operations with
+ // iterators.
+ DWARFDebugLine::Row row;
+ row.Address = address;
+ RowIter first_row = Rows.begin() + seq.FirstRowIndex;
+ RowIter last_row = Rows.begin() + seq.LastRowIndex;
+ LineTable::RowIter row_pos = std::lower_bound(
+ first_row, last_row, row, DWARFDebugLine::Row::orderByAddress);
+ if (row_pos == last_row) {
+ return seq.LastRowIndex - 1;
+ }
+ uint32_t index = seq.FirstRowIndex + (row_pos - first_row);
+ if (row_pos->Address > address) {
+ if (row_pos == first_row)
+ return UnknownRowIndex;
+ else
+ index--;
+ }
+ return index;
uint32_t DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const {
- uint32_t unknown_index = UINT32_MAX;
if (Sequences.empty())
- return unknown_index;
+ return UnknownRowIndex;
// First, find an instruction sequence containing the given address.
DWARFDebugLine::Sequence sequence;
sequence.LowPC = address;
@@ -540,31 +575,10 @@ uint32_t DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const {
found_seq = *seq_pos;
} else {
if (seq_pos == first_seq)
- return unknown_index;
+ return UnknownRowIndex;
found_seq = *(seq_pos - 1);
- if (!found_seq.containsPC(address))
- return unknown_index;
- // Search for instruction address in the rows describing the sequence.
- // Rows are stored in a vector, so we may use arithmetical operations with
- // iterators.
- DWARFDebugLine::Row row;
- row.Address = address;
- RowIter first_row = Rows.begin() + found_seq.FirstRowIndex;
- RowIter last_row = Rows.begin() + found_seq.LastRowIndex;
- RowIter row_pos = std::lower_bound(first_row, last_row, row,
- DWARFDebugLine::Row::orderByAddress);
- if (row_pos == last_row) {
- return found_seq.LastRowIndex - 1;
- }
- uint32_t index = found_seq.FirstRowIndex + (row_pos - first_row);
- if (row_pos->Address > address) {
- if (row_pos == first_row)
- return unknown_index;
- else
- index--;
- }
- return index;
+ return findRowInSeq(found_seq, address);
bool DWARFDebugLine::LineTable::lookupAddressRange(
@@ -593,45 +607,21 @@ bool DWARFDebugLine::LineTable::lookupAddressRange(
// index we just calculated
while (seq_pos != last_seq && seq_pos->LowPC < end_addr) {
- DWARFDebugLine::Sequence cur_seq = *seq_pos;
- uint32_t first_row_index;
- uint32_t last_row_index;
- if (seq_pos == start_pos) {
- // For the first sequence, we need to find which row in the sequence is the
- // first in our range. Rows are stored in a vector, so we may use
- // arithmetical operations with iterators.
- DWARFDebugLine::Row row;
- row.Address = address;
- RowIter first_row = Rows.begin() + cur_seq.FirstRowIndex;
- RowIter last_row = Rows.begin() + cur_seq.LastRowIndex;
- RowIter row_pos = std::upper_bound(first_row, last_row, row,
- DWARFDebugLine::Row::orderByAddress);
- // The 'row_pos' iterator references the first row that is greater than
- // our start address. Unless that's the first row, we want to start at
- // the row before that.
- first_row_index = cur_seq.FirstRowIndex + (row_pos - first_row);
- if (row_pos != first_row)
- --first_row_index;
- } else
- first_row_index = cur_seq.FirstRowIndex;
- // For the last sequence in our range, we need to figure out the last row in
- // range. For all other sequences we can go to the end of the sequence.
- if (cur_seq.HighPC > end_addr) {
- DWARFDebugLine::Row row;
- row.Address = end_addr;
- RowIter first_row = Rows.begin() + cur_seq.FirstRowIndex;
- RowIter last_row = Rows.begin() + cur_seq.LastRowIndex;
- RowIter row_pos = std::upper_bound(first_row, last_row, row,
- DWARFDebugLine::Row::orderByAddress);
- // The 'row_pos' iterator references the first row that is greater than
- // our end address. The row before that is the last row we want.
- last_row_index = cur_seq.FirstRowIndex + (row_pos - first_row) - 1;
- } else
- // Contrary to what you might expect, DWARFDebugLine::SequenceLastRowIndex
- // isn't a valid index within the current sequence. It's that plus one.
+ const DWARFDebugLine::Sequence &cur_seq = *seq_pos;
+ // For the first sequence, we need to find which row in the sequence is the
+ // first in our range.
+ uint32_t first_row_index = cur_seq.FirstRowIndex;
+ if (seq_pos == start_pos)
+ first_row_index = findRowInSeq(cur_seq, address);
+ // Figure out the last row in the range.
+ uint32_t last_row_index = findRowInSeq(cur_seq, end_addr - 1);
+ if (last_row_index == UnknownRowIndex)
last_row_index = cur_seq.LastRowIndex - 1;
+ assert(first_row_index != UnknownRowIndex);
+ assert(last_row_index != UnknownRowIndex);
for (uint32_t i = first_row_index; i <= last_row_index; ++i) {
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index d7038fd..9e71b10 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -438,7 +438,7 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
if (NumArgs > 2) {
std::vector<std::string> EnvVars;
for (unsigned i = 0; envp[i]; ++i)
- EnvVars.push_back(envp[i]);
+ EnvVars.emplace_back(envp[i]);
// Arg #2 = envp.
GVArgs.push_back(PTOGV(CEnv.reset(Fn->getContext(), this, EnvVars)));
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index ce1ab59..55ab5af 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetOptions.h"
#include <cstring>
using namespace llvm;
@@ -254,11 +255,8 @@ int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F,
unsigned ArgC, const char * const *ArgV,
const char * const *EnvP) {
- std::vector<std::string> ArgVec;
- for (unsigned I = 0; I != ArgC; ++I)
- ArgVec.push_back(ArgV[I]);
+ std::vector<std::string> ArgVec(ArgV, ArgV + ArgC);
return unwrap(EE)->runFunctionAsMain(unwrap<Function>(F), ArgVec, EnvP);
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index ec67019..08d9d6b 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -119,10 +119,9 @@ void IntelJITEventListener::NotifyObjectEmitted(
if (SymType == SymbolRef::ST_Function) {
StringRef Name;
uint64_t Addr;
- uint64_t Size;
if (I->getName(Name)) continue;
if (I->getAddress(Addr)) continue;
- if (I->getSize(Size)) continue;
+ uint64_t Size = I->getSize();
// Record this address in a local vector
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index a26740b..39a8027 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -2079,7 +2079,7 @@ void Interpreter::callFunction(Function *F,
ECStack.back().Caller.arg_size() == ArgVals.size()) &&
"Incorrect number of arguments passed into function call!");
// Make a new stack frame... and fill it in.
- ECStack.push_back(ExecutionContext());
+ ECStack.emplace_back();
ExecutionContext &StackFrame = ECStack.back();
StackFrame.CurFunction = F;
diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt
index 8509852..9d29a41 100644
--- a/lib/ExecutionEngine/LLVMBuild.txt
+++ b/lib/ExecutionEngine/LLVMBuild.txt
@@ -22,4 +22,4 @@ subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc
type = Library
name = ExecutionEngine
parent = Libraries
-required_libraries = Core MC Object RuntimeDyld Support
+required_libraries = Core MC Object RuntimeDyld Support Target
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 24a3ec1..6d64d68 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -120,7 +120,7 @@ static std::error_code getOffset(const SymbolRef &Sym, uint64_t &Result) {
if (Address == UnknownAddressOrSize) {
Result = UnknownAddressOrSize;
- return object_error::success;
+ return std::error_code();
const ObjectFile *Obj = Sym.getObject();
@@ -130,12 +130,12 @@ static std::error_code getOffset(const SymbolRef &Sym, uint64_t &Result) {
if (SecI == Obj->section_end()) {
Result = UnknownAddressOrSize;
- return object_error::success;
+ return std::error_code();
uint64_t SectionAddress = SecI->getAddress();
Result = Address - SectionAddress;
- return object_error::success;
+ return std::error_code();
std::pair<unsigned, unsigned>
@@ -149,6 +149,7 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
// Save information about our target
Arch = (Triple::ArchType)Obj.getArch();
IsTargetLittleEndian = Obj.isLittleEndian();
+ setMipsABI(Obj);
// Compute the memory size required to load all sections to be loaded
// and pass this information to the memory manager
@@ -386,8 +387,7 @@ void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
uint32_t Flags = I->getFlags();
if (Flags & SymbolRef::SF_Common) {
// Add the common symbols to a list. We'll allocate them all below.
- uint64_t Size = 0;
- Check(I->getSize(Size));
+ uint64_t Size = I->getSize();
CommonSize += Size;
@@ -493,10 +493,8 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
- uint32_t Align = 0;
- uint64_t Size = 0;
- Check(Sym.getAlignment(Align));
- Check(Sym.getSize(Size));
+ uint32_t Align = Sym.getAlignment();
+ uint64_t Size = Sym.getSize();
CommonSize += Align + Size;
@@ -517,11 +515,9 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
// Assign the address of each symbol
for (auto &Sym : SymbolsToAllocate) {
- uint32_t Align;
- uint64_t Size;
+ uint32_t Align = Sym.getAlignment();
StringRef Name;
- Check(Sym.getAlignment(Align));
- Check(Sym.getSize(Size));
+ uint64_t Size = Sym.getSize();
if (Align) {
// This symbol has an alignment requirement.
@@ -689,7 +685,7 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr,
// and stubs for branches Thumb - ARM and ARM - Thumb.
writeBytesUnaligned(0xe51ff004, Addr, 4); // ldr pc,<label>
return Addr + 4;
- } else if (Arch == Triple::mipsel || Arch == Triple::mips) {
+ } else if (IsMipsO32ABI) {
// 0: 3c190000 lui t9,%hi(addr).
// 4: 27390000 addiu t9,t9,%lo(addr).
// 8: 03200008 jr t9.
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 95421b3..b4a34e8 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -157,16 +157,16 @@ OwningBinary<ObjectFile> createELFDebugObject(const ObjectFile &Obj,
std::unique_ptr<ObjectFile> DebugObj;
if (Obj.getBytesInAddress() == 4 && Obj.isLittleEndian()) {
- typedef ELFType<support::little, 2, false> ELF32LE;
+ typedef ELFType<support::little, false> ELF32LE;
DebugObj = createRTDyldELFObject<ELF32LE>(Buffer->getMemBufferRef(), L, ec);
} else if (Obj.getBytesInAddress() == 4 && !Obj.isLittleEndian()) {
- typedef ELFType<support::big, 2, false> ELF32BE;
+ typedef ELFType<support::big, false> ELF32BE;
DebugObj = createRTDyldELFObject<ELF32BE>(Buffer->getMemBufferRef(), L, ec);
} else if (Obj.getBytesInAddress() == 8 && !Obj.isLittleEndian()) {
- typedef ELFType<support::big, 2, true> ELF64BE;
+ typedef ELFType<support::big, true> ELF64BE;
DebugObj = createRTDyldELFObject<ELF64BE>(Buffer->getMemBufferRef(), L, ec);
} else if (Obj.getBytesInAddress() == 8 && Obj.isLittleEndian()) {
- typedef ELFType<support::little, 2, true> ELF64LE;
+ typedef ELFType<support::little, true> ELF64LE;
DebugObj = createRTDyldELFObject<ELF64LE>(Buffer->getMemBufferRef(), L, ec);
} else
llvm_unreachable("Unexpected ELF format");
@@ -477,34 +477,243 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section,
uint64_t Offset, uint32_t Value,
uint32_t Type, int32_t Addend) {
- uint32_t *TargetPtr = (uint32_t *)(Section.Address + Offset);
+ uint8_t *TargetPtr = Section.Address + Offset;
Value += Addend;
- DEBUG(dbgs() << "resolveMipselocation, LocalAddress: "
+ DEBUG(dbgs() << "resolveMIPSRelocation, LocalAddress: "
<< Section.Address + Offset << " FinalAddress: "
<< format("%p", Section.LoadAddress + Offset) << " Value: "
<< format("%x", Value) << " Type: " << format("%x", Type)
<< " Addend: " << format("%x", Addend) << "\n");
+ uint32_t Insn = readBytesUnaligned(TargetPtr, 4);
switch (Type) {
llvm_unreachable("Not implemented relocation type!");
case ELF::R_MIPS_32:
- *TargetPtr = Value;
+ writeBytesUnaligned(Value, TargetPtr, 4);
case ELF::R_MIPS_26:
- *TargetPtr = ((*TargetPtr) & 0xfc000000) | ((Value & 0x0fffffff) >> 2);
+ Insn &= 0xfc000000;
+ Insn |= (Value & 0x0fffffff) >> 2;
+ writeBytesUnaligned(Insn, TargetPtr, 4);
case ELF::R_MIPS_HI16:
// Get the higher 16-bits. Also add 1 if bit 15 is 1.
- *TargetPtr =
- ((*TargetPtr) & 0xffff0000) | (((Value + 0x8000) >> 16) & 0xffff);
+ Insn &= 0xffff0000;
+ Insn |= ((Value + 0x8000) >> 16) & 0xffff;
+ writeBytesUnaligned(Insn, TargetPtr, 4);
case ELF::R_MIPS_LO16:
- *TargetPtr = ((*TargetPtr) & 0xffff0000) | (Value & 0xffff);
+ Insn &= 0xffff0000;
+ Insn |= Value & 0xffff;
+ writeBytesUnaligned(Insn, TargetPtr, 4);
+ case ELF::R_MIPS_PC32:
+ uint32_t FinalAddress = (Section.LoadAddress + Offset);
+ writeBytesUnaligned(Value + Addend - FinalAddress, (uint8_t *)TargetPtr, 4);
+ break;
+ }
+void RuntimeDyldELF::setMipsABI(const ObjectFile &Obj) {
+ if (!StringRef(Triple::getArchTypePrefix(Arch)).equals("mips")) {
+ IsMipsO32ABI = false;
+ IsMipsN64ABI = false;
+ return;
+ }
+ unsigned AbiVariant;
+ Obj.getPlatformFlags(AbiVariant);
+ IsMipsO32ABI = AbiVariant & ELF::EF_MIPS_ABI_O32;
+ IsMipsN64ABI = Obj.getFileFormatName().equals("ELF64-mips");
+ if (AbiVariant & ELF::EF_MIPS_ABI2)
+ llvm_unreachable("Mips N32 ABI is not supported yet");
+void RuntimeDyldELF::resolveMIPS64Relocation(const SectionEntry &Section,
+ uint64_t Offset, uint64_t Value,
+ uint32_t Type, int64_t Addend,
+ uint64_t SymOffset,
+ SID SectionID) {
+ uint32_t r_type = Type & 0xff;
+ uint32_t r_type2 = (Type >> 8) & 0xff;
+ uint32_t r_type3 = (Type >> 16) & 0xff;
+ // RelType is used to keep information for which relocation type we are
+ // applying relocation.
+ uint32_t RelType = r_type;
+ int64_t CalculatedValue = evaluateMIPS64Relocation(Section, Offset, Value,
+ RelType, Addend,
+ SymOffset, SectionID);
+ if (r_type2 != ELF::R_MIPS_NONE) {
+ RelType = r_type2;
+ CalculatedValue = evaluateMIPS64Relocation(Section, Offset, 0, RelType,
+ CalculatedValue, SymOffset,
+ SectionID);
+ }
+ if (r_type3 != ELF::R_MIPS_NONE) {
+ RelType = r_type3;
+ CalculatedValue = evaluateMIPS64Relocation(Section, Offset, 0, RelType,
+ CalculatedValue, SymOffset,
+ SectionID);
+ applyMIPS64Relocation(Section.Address + Offset, CalculatedValue, RelType);
+RuntimeDyldELF::evaluateMIPS64Relocation(const SectionEntry &Section,
+ uint64_t Offset, uint64_t Value,
+ uint32_t Type, int64_t Addend,
+ uint64_t SymOffset, SID SectionID) {
+ DEBUG(dbgs() << "evaluateMIPS64Relocation, LocalAddress: 0x"
+ << format("%llx", Section.Address + Offset)
+ << " FinalAddress: 0x"
+ << format("%llx", Section.LoadAddress + Offset)
+ << " Value: 0x" << format("%llx", Value) << " Type: 0x"
+ << format("%x", Type) << " Addend: 0x" << format("%llx", Addend)
+ << " SymOffset: " << format("%x", SymOffset)
+ << "\n");
+ switch (Type) {
+ default:
+ llvm_unreachable("Not implemented relocation type!");
+ break;
+ case ELF::R_MIPS_JALR:
+ case ELF::R_MIPS_NONE:
+ break;
+ case ELF::R_MIPS_32:
+ case ELF::R_MIPS_64:
+ return Value + Addend;
+ case ELF::R_MIPS_26:
+ return ((Value + Addend) >> 2) & 0x3ffffff;
+ case ELF::R_MIPS_GPREL16: {
+ uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]);
+ return Value + Addend - (GOTAddr + 0x7ff0);
+ }
+ case ELF::R_MIPS_SUB:
+ return Value - Addend;
+ case ELF::R_MIPS_HI16:
+ // Get the higher 16-bits. Also add 1 if bit 15 is 1.
+ return ((Value + Addend + 0x8000) >> 16) & 0xffff;
+ case ELF::R_MIPS_LO16:
+ return (Value + Addend) & 0xffff;
+ case ELF::R_MIPS_CALL16:
+ case ELF::R_MIPS_GOT_PAGE: {
+ uint8_t *LocalGOTAddr =
+ getSectionAddress(SectionToGOTMap[SectionID]) + SymOffset;
+ uint64_t GOTEntry = readBytesUnaligned(LocalGOTAddr, 8);
+ Value += Addend;
+ if (Type == ELF::R_MIPS_GOT_PAGE)
+ Value = (Value + 0x8000) & ~0xffff;
+ if (GOTEntry)
+ assert(GOTEntry == Value &&
+ "GOT entry has two different addresses.");
+ else
+ writeBytesUnaligned(Value, LocalGOTAddr, 8);
+ return (SymOffset - 0x7ff0) & 0xffff;
+ }
+ case ELF::R_MIPS_GOT_OFST: {
+ int64_t page = (Value + Addend + 0x8000) & ~0xffff;
+ return (Value + Addend - page) & 0xffff;
+ }
+ case ELF::R_MIPS_GPREL32: {
+ uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]);
+ return Value + Addend - (GOTAddr + 0x7ff0);
+ }
+ case ELF::R_MIPS_PC16: {
+ uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ return ((Value + Addend - FinalAddress - 4) >> 2) & 0xffff;
+ }
+ case ELF::R_MIPS_PC32: {
+ uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ return Value + Addend - FinalAddress;
+ }
+ case ELF::R_MIPS_PC18_S3: {
+ uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ return ((Value + Addend - ((FinalAddress | 7) ^ 7)) >> 3) & 0x3ffff;
+ }
+ case ELF::R_MIPS_PC19_S2: {
+ uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ return ((Value + Addend - FinalAddress) >> 2) & 0x7ffff;
+ }
+ case ELF::R_MIPS_PC21_S2: {
+ uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ return ((Value + Addend - FinalAddress) >> 2) & 0x1fffff;
+ }
+ case ELF::R_MIPS_PC26_S2: {
+ uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ return ((Value + Addend - FinalAddress) >> 2) & 0x3ffffff;
+ }
+ case ELF::R_MIPS_PCHI16: {
+ uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ return ((Value + Addend - FinalAddress + 0x8000) >> 16) & 0xffff;
+ }
+ case ELF::R_MIPS_PCLO16: {
+ uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ return (Value + Addend - FinalAddress) & 0xffff;
+ }
+ }
+ return 0;
+void RuntimeDyldELF::applyMIPS64Relocation(uint8_t *TargetPtr,
+ int64_t CalculatedValue,
+ uint32_t Type) {
+ uint32_t Insn = readBytesUnaligned(TargetPtr, 4);
+ switch (Type) {
+ default:
+ break;
+ case ELF::R_MIPS_32:
+ case ELF::R_MIPS_GPREL32:
+ case ELF::R_MIPS_PC32:
+ writeBytesUnaligned(CalculatedValue & 0xffffffff, TargetPtr, 4);
+ break;
+ case ELF::R_MIPS_64:
+ case ELF::R_MIPS_SUB:
+ writeBytesUnaligned(CalculatedValue, TargetPtr, 8);
+ break;
+ case ELF::R_MIPS_26:
+ case ELF::R_MIPS_PC26_S2:
+ Insn = (Insn & 0xfc000000) | CalculatedValue;
+ writeBytesUnaligned(Insn, TargetPtr, 4);
+ break;
+ case ELF::R_MIPS_GPREL16:
+ Insn = (Insn & 0xffff0000) | (CalculatedValue & 0xffff);
+ writeBytesUnaligned(Insn, TargetPtr, 4);
+ break;
+ case ELF::R_MIPS_HI16:
+ case ELF::R_MIPS_LO16:
+ case ELF::R_MIPS_PCHI16:
+ case ELF::R_MIPS_PCLO16:
+ case ELF::R_MIPS_PC16:
+ case ELF::R_MIPS_CALL16:
+ Insn = (Insn & 0xffff0000) | CalculatedValue;
+ writeBytesUnaligned(Insn, TargetPtr, 4);
+ break;
+ case ELF::R_MIPS_PC18_S3:
+ Insn = (Insn & 0xfffc0000) | CalculatedValue;
+ writeBytesUnaligned(Insn, TargetPtr, 4);
+ break;
+ case ELF::R_MIPS_PC19_S2:
+ Insn = (Insn & 0xfff80000) | CalculatedValue;
+ writeBytesUnaligned(Insn, TargetPtr, 4);
+ break;
+ case ELF::R_MIPS_PC21_S2:
+ Insn = (Insn & 0xffe00000) | CalculatedValue;
+ writeBytesUnaligned(Insn, TargetPtr, 4);
+ break;
+ }
// Return the .TOC. section and offset.
@@ -521,17 +730,15 @@ void RuntimeDyldELF::findPPC64TOCSection(const ObjectFile &Obj,
// The TOC consists of sections .got, .toc, .tocbss, .plt in that
// order. The TOC starts where the first of these sections starts.
- for (section_iterator si = Obj.section_begin(), se = Obj.section_end();
- si != se; ++si) {
+ for (auto &Section: Obj.sections()) {
StringRef SectionName;
- check(si->getName(SectionName));
+ check(Section.getName(SectionName));
if (SectionName == ".got"
|| SectionName == ".toc"
|| SectionName == ".tocbss"
|| SectionName == ".plt") {
- Rel.SectionID = findOrEmitSection(Obj, *si, false, LocalSections);
+ Rel.SectionID = findOrEmitSection(Obj, Section, false, LocalSections);
@@ -784,13 +991,13 @@ void RuntimeDyldELF::resolveRelocation(const RelocationEntry &RE,
uint64_t Value) {
const SectionEntry &Section = Sections[RE.SectionID];
return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend,
- RE.SymOffset);
+ RE.SymOffset, RE.SectionID);
void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
uint64_t Offset, uint64_t Value,
uint32_t Type, int64_t Addend,
- uint64_t SymOffset) {
+ uint64_t SymOffset, SID SectionID) {
switch (Arch) {
case Triple::x86_64:
resolveX86_64Relocation(Section, Offset, Value, Type, Addend, SymOffset);
@@ -812,8 +1019,16 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
case Triple::mips: // Fall through.
case Triple::mipsel:
- resolveMIPSRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL),
- Type, (uint32_t)(Addend & 0xffffffffL));
+ case Triple::mips64:
+ case Triple::mips64el:
+ if (IsMipsO32ABI)
+ resolveMIPSRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL),
+ Type, (uint32_t)(Addend & 0xffffffffL));
+ else if (IsMipsN64ABI)
+ resolveMIPS64Relocation(Section, Offset, Value, Type, Addend, SymOffset,
+ SectionID);
+ else
+ llvm_unreachable("Mips ABI not handled");
case Triple::ppc64: // Fall through.
case Triple::ppc64le:
@@ -999,8 +1214,10 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
processSimpleRelocation(SectionID, Offset, RelType, Value);
- } else if ((Arch == Triple::mipsel || Arch == Triple::mips)) {
- uint32_t *Placeholder = reinterpret_cast<uint32_t*>(computePlaceholderAddress(SectionID, Offset));
+ } else if (IsMipsO32ABI) {
+ uint8_t *Placeholder = reinterpret_cast<uint8_t *>(
+ computePlaceholderAddress(SectionID, Offset));
+ uint32_t Opcode = readBytesUnaligned(Placeholder, 4);
if (RelType == ELF::R_MIPS_26) {
// This is an Mips branch relocation, need to use a stub function.
DEBUG(dbgs() << "\t\tThis is a Mips branch relocation.");
@@ -1009,7 +1226,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
// Extract the addend from the instruction.
// We shift up by two since the Value will be down shifted again
// when applying the relocation.
- uint32_t Addend = ((*Placeholder) & 0x03ffffff) << 2;
+ uint32_t Addend = (Opcode & 0x03ffffff) << 2;
Value.Addend += Addend;
@@ -1047,13 +1264,30 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
} else {
if (RelType == ELF::R_MIPS_HI16)
- Value.Addend += ((*Placeholder) & 0x0000ffff) << 16;
+ Value.Addend += (Opcode & 0x0000ffff) << 16;
else if (RelType == ELF::R_MIPS_LO16)
- Value.Addend += ((*Placeholder) & 0x0000ffff);
+ Value.Addend += (Opcode & 0x0000ffff);
else if (RelType == ELF::R_MIPS_32)
- Value.Addend += *Placeholder;
+ Value.Addend += Opcode;
processSimpleRelocation(SectionID, Offset, RelType, Value);
+ } else if (IsMipsN64ABI) {
+ uint32_t r_type = RelType & 0xff;
+ RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
+ if (r_type == ELF::R_MIPS_CALL16 || r_type == ELF::R_MIPS_GOT_PAGE
+ || r_type == ELF::R_MIPS_GOT_DISP) {
+ StringMap<uint64_t>::iterator i = GOTSymbolOffsets.find(TargetName);
+ if (i != GOTSymbolOffsets.end())
+ RE.SymOffset = i->second;
+ else {
+ RE.SymOffset = allocateGOTEntries(SectionID, 1);
+ GOTSymbolOffsets[TargetName] = RE.SymOffset;
+ }
+ }
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
} else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) {
if (RelType == ELF::R_PPC64_REL24) {
// Determine ABI variant in use for this object.
@@ -1356,9 +1590,18 @@ size_t RuntimeDyldELF::getGOTEntrySize() {
case Triple::x86:
case Triple::arm:
case Triple::thumb:
+ Result = sizeof(uint32_t);
+ break;
case Triple::mips:
case Triple::mipsel:
- Result = sizeof(uint32_t);
+ case Triple::mips64:
+ case Triple::mips64el:
+ if (IsMipsO32ABI)
+ Result = sizeof(uint32_t);
+ else if (IsMipsN64ABI)
+ Result = sizeof(uint64_t);
+ else
+ llvm_unreachable("Mips ABI not handled");
llvm_unreachable("Unsupported CPU type!");
@@ -1413,6 +1656,20 @@ void RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
// For now, initialize all GOT entries to zero. We'll fill them in as
// needed when GOT-based relocations are applied.
memset(Addr, 0, TotalSize);
+ if (IsMipsN64ABI) {
+ // To correctly resolve Mips GOT relocations, we need a mapping from
+ // object's sections to GOTs.
+ for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end();
+ SI != SE; ++SI) {
+ if (SI->relocation_begin() != SI->relocation_end()) {
+ section_iterator RelocatedSection = SI->getRelocatedSection();
+ ObjSectionToIDMap::iterator i = SectionMap.find(*RelocatedSection);
+ assert (i != SectionMap.end());
+ SectionToGOTMap[i->second] = GOTSectionID;
+ }
+ }
+ GOTSymbolOffsets.clear();
+ }
// Look for and record the EH frame section.
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index 9a4a863..3a377a2 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -25,7 +25,7 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
void resolveRelocation(const SectionEntry &Section, uint64_t Offset,
uint64_t Value, uint32_t Type, int64_t Addend,
- uint64_t SymOffset = 0);
+ uint64_t SymOffset = 0, SID SectionID = 0);
void resolveX86_64Relocation(const SectionEntry &Section, uint64_t Offset,
uint64_t Value, uint32_t Type, int64_t Addend,
@@ -49,12 +49,24 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
void resolveSystemZRelocation(const SectionEntry &Section, uint64_t Offset,
uint64_t Value, uint32_t Type, int64_t Addend);
+ void resolveMIPS64Relocation(const SectionEntry &Section, uint64_t Offset,
+ uint64_t Value, uint32_t Type, int64_t Addend,
+ uint64_t SymOffset, SID SectionID);
+ int64_t evaluateMIPS64Relocation(const SectionEntry &Section,
+ uint64_t Offset, uint64_t Value,
+ uint32_t Type, int64_t Addend,
+ uint64_t SymOffset, SID SectionID);
+ void applyMIPS64Relocation(uint8_t *TargetPtr, int64_t CalculatedValue,
+ uint32_t Type);
unsigned getMaxStubSize() override {
if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be)
return 20; // movz; movk; movk; movk; br
if (Arch == Triple::arm || Arch == Triple::thumb)
return 8; // 32-bit instruction and 32-bit address
- else if (Arch == Triple::mipsel || Arch == Triple::mips)
+ else if (IsMipsO32ABI)
return 16;
else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le)
return 44;
@@ -73,6 +85,8 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
return 1;
+ void setMipsABI(const ObjectFile &Obj) override;
void findPPC64TOCSection(const ObjectFile &Obj,
ObjSectionToIDMap &LocalSections,
RelocationValueRef &Rel);
@@ -114,6 +128,13 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
// that consume more than one slot)
unsigned CurrentGOTIndex;
+ // A map from section to a GOT section that has entries for section's GOT
+ // relocations. (Mips64 specific)
+ DenseMap<SID, SID> SectionToGOTMap;
+ // A map to avoid duplicate got entries (Mips64 specific)
+ StringMap<uint64_t> GOTSymbolOffsets;
// When a module is loaded we save the SectionID of the EH frame section
// in a table until we receive a request to register all unregistered
// EH frame sections with the memory manager.
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 90e61a5..e085a92 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -236,6 +236,8 @@ protected:
Triple::ArchType Arch;
bool IsTargetLittleEndian;
+ bool IsMipsO32ABI;
+ bool IsMipsN64ABI;
// True if all sections should be passed to the memory manager, false if only
// sections containing relocations should be. Defaults to 'false'.
@@ -303,6 +305,11 @@ protected:
*(Addr + 7) = Value & 0xFF;
+ virtual void setMipsABI(const ObjectFile &Obj) {
+ IsMipsO32ABI = false;
+ IsMipsN64ABI = false;
+ }
/// Endian-aware read Read the least significant Size bytes from Src.
uint64_t readBytesUnaligned(uint8_t *Src, unsigned Size) const;
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
index 053f90c..dd454ae 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
@@ -167,20 +167,19 @@ private:
uint32_t SectionBID =
findOrEmitSection(Obj, SectionB, IsCode, ObjSectionToID);
- if (Addend != AddrA - AddrB)
- Error("Unexpected SECTDIFF relocation addend.");
+ // Compute the addend 'C' from the original expression 'A - B + C'.
+ Addend -= AddrA - AddrB;
DEBUG(dbgs() << "Found SECTDIFF: AddrA: " << AddrA << ", AddrB: " << AddrB
<< ", Addend: " << Addend << ", SectionA ID: " << SectionAID
<< ", SectionAOffset: " << SectionAOffset
<< ", SectionB ID: " << SectionBID
<< ", SectionBOffset: " << SectionBOffset << "\n");
- RelocationEntry R(SectionID, Offset, RelocType, 0, SectionAID,
- SectionAOffset, SectionBID, SectionBOffset, IsPCRel,
- Size);
+ RelocationEntry R(SectionID, Offset, RelocType, Addend, SectionAID,
+ SectionAOffset, SectionBID, SectionBOffset,
+ IsPCRel, Size);
addRelocationForSection(R, SectionAID);
- addRelocationForSection(R, SectionBID);
return ++RelI;
diff --git a/lib/Fuzzer/FuzzerInterface.h b/lib/Fuzzer/FuzzerInterface.h
index 8cf9962..3fd807a 100644
--- a/lib/Fuzzer/FuzzerInterface.h
+++ b/lib/Fuzzer/FuzzerInterface.h
@@ -69,12 +69,12 @@ class UserSuppliedFuzzer {
/// Executes the target function on 'Size' bytes of 'Data'.
virtual void TargetFunction(const uint8_t *Data, size_t Size) = 0;
/// Mutates 'Size' bytes of data in 'Data' inplace into up to 'MaxSize' bytes,
- /// returns the new size of the data.
+ /// returns the new size of the data, which should be positive.
virtual size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize) {
return BasicMutate(Data, Size, MaxSize);
/// Crosses 'Data1' and 'Data2', writes up to 'MaxOutSize' bytes into Out,
- /// returns the number of bytes written.
+ /// returns the number of bytes written, which should be positive.
virtual size_t CrossOver(const uint8_t *Data1, size_t Size1,
const uint8_t *Data2, size_t Size2,
uint8_t *Out, size_t MaxOutSize) {
diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp
index 4c8b247..9ef4758 100644
--- a/lib/Fuzzer/FuzzerLoop.cpp
+++ b/lib/Fuzzer/FuzzerLoop.cpp
@@ -289,7 +289,9 @@ void Fuzzer::MutateAndTestOne(Unit *U) {
size_t Size = U->size();
size_t NewSize = USF.Mutate(U->data(), Size, U->size());
- assert(NewSize > 0 && NewSize <= (size_t)Options.MaxLen);
+ assert(NewSize > 0 && "Mutator returned empty unit");
+ assert(NewSize <= (size_t)Options.MaxLen &&
+ "Mutator return overisized unit");
size_t NumTraceBasedMutations = StopTraceRecording();
@@ -317,7 +319,9 @@ void Fuzzer::Loop(size_t NumIterations) {
size_t NewSize = USF.CrossOver(
Corpus[J1].data(), Corpus[J1].size(), Corpus[J2].data(),
Corpus[J2].size(),, CurrentUnit.size());
- assert(NewSize > 0 && NewSize <= (size_t)Options.MaxLen);
+ assert(NewSize > 0 && "CrossOver returned empty unit");
+ assert(NewSize <= (size_t)Options.MaxLen &&
+ "CrossOver return overisized unit");
diff --git a/lib/Fuzzer/FuzzerTraceState.cpp b/lib/Fuzzer/FuzzerTraceState.cpp
index ddb0764..b2e1e95 100644
--- a/lib/Fuzzer/FuzzerTraceState.cpp
+++ b/lib/Fuzzer/FuzzerTraceState.cpp
@@ -332,7 +332,7 @@ extern "C" {
void __dfsw___sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1,
uint64_t Arg2, dfsan_label L0,
dfsan_label L1, dfsan_label L2) {
- assert(TS);
+ if (!TS) return;
assert(L0 == 0);
uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
uint64_t CmpSize = (SizeAndType >> 32) / 8;
@@ -343,7 +343,7 @@ void __dfsw___sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1,
void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2,
size_t n, dfsan_label s1_label,
dfsan_label s2_label, dfsan_label n_label) {
- assert(TS);
+ if (!TS) return;
uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
uint64_t S1 = 0, S2 = 0;
// Simplification: handle only first 8 bytes.
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index 1089cb5..0744fdf 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -2140,27 +2140,20 @@ void AssemblyWriter::printModule(const Module *M) {
Out << "target triple = \"" << M->getTargetTriple() << "\"\n";
if (!M->getModuleInlineAsm().empty()) {
- // Split the string into lines, to make it easier to read the .ll file.
- std::string Asm = M->getModuleInlineAsm();
- size_t CurPos = 0;
- size_t NewLine = Asm.find_first_of('\n', CurPos);
Out << '\n';
- while (NewLine != std::string::npos) {
+ // Split the string into lines, to make it easier to read the .ll file.
+ StringRef Asm = M->getModuleInlineAsm();
+ do {
+ StringRef Front;
+ std::tie(Front, Asm) = Asm.split('\n');
// We found a newline, print the portion of the asm string from the
// last newline up to this newline.
Out << "module asm \"";
- PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine),
- Out);
- Out << "\"\n";
- CurPos = NewLine+1;
- NewLine = Asm.find_first_of('\n', CurPos);
- }
- std::string rest(Asm.begin()+CurPos, Asm.end());
- if (!rest.empty()) {
- Out << "module asm \"";
- PrintEscapedString(rest, Out);
+ PrintEscapedString(Front, Out);
Out << "\"\n";
- }
+ } while (!Asm.empty());
@@ -2215,15 +2208,13 @@ void AssemblyWriter::printModule(const Module *M) {
-void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
- Out << '!';
- StringRef Name = NMD->getName();
+static void printMetadataIdentifier(StringRef Name,
+ formatted_raw_ostream &Out) {
if (Name.empty()) {
Out << "<empty name> ";
} else {
- if (isalpha(static_cast<unsigned char>(Name[0])) ||
- Name[0] == '-' || Name[0] == '$' ||
- Name[0] == '.' || Name[0] == '_')
+ if (isalpha(static_cast<unsigned char>(Name[0])) || Name[0] == '-' ||
+ Name[0] == '$' || Name[0] == '.' || Name[0] == '_')
Out << Name[0];
Out << '\\' << hexdigit(Name[0] >> 4) << hexdigit(Name[0] & 0x0F);
@@ -2236,9 +2227,15 @@ void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
+ Out << '!';
+ printMetadataIdentifier(NMD->getName(), Out);
Out << " = !{";
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- if (i) Out << ", ";
+ if (i)
+ Out << ", ";
int Slot = Machine.getMetadataSlot(NMD->getOperand(i));
if (Slot == -1)
Out << "<badref>";
@@ -2248,7 +2245,6 @@ void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
Out << "}\n";
static void PrintLinkage(GlobalValue::LinkageTypes LT,
formatted_raw_ostream &Out) {
switch (LT) {
@@ -2268,7 +2264,6 @@ static void PrintLinkage(GlobalValue::LinkageTypes LT,
static void PrintVisibility(GlobalValue::VisibilityTypes Vis,
formatted_raw_ostream &Out) {
switch (Vis) {
@@ -3008,9 +3003,10 @@ void AssemblyWriter::printMetadataAttachments(
for (const auto &I : MDs) {
unsigned Kind = I.first;
Out << Separator;
- if (Kind < MDNames.size())
- Out << "!" << MDNames[Kind];
- else
+ if (Kind < MDNames.size()) {
+ Out << "!";
+ printMetadataIdentifier(MDNames[Kind], Out);
+ } else
Out << "!<unknown kind #" << Kind << ">";
Out << ' ';
WriteAsOperandInternal(Out, I.second, &TypePrinter, &Machine, TheModule);
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index 3f64c43..2efc612 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -2165,9 +2165,9 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C,
// factored out into preceding dimensions.
bool Unknown = false;
SmallVector<Constant *, 8> NewIdxs;
- Type *Ty = PointeeTy;
- Type *Prev = C->getType();
- for (unsigned i = 1, e = Idxs.size(); i != e;
+ Type *Ty = C->getType();
+ Type *Prev = nullptr;
+ for (unsigned i = 0, e = Idxs.size(); i != e;
Prev = Ty, Ty = cast<CompositeType>(Ty)->getTypeAtIndex(Idxs[i]), ++i) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Idxs[i])) {
if (isa<ArrayType>(Ty) || isa<VectorType>(Ty))
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 9557cda..d476434 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -461,6 +461,11 @@ void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest) {
*Dest++ = wrap(*I);
+LLVMTypeRef LLVMStructGetTypeAtIndex(LLVMTypeRef StructTy, unsigned i) {
+ StructType *Ty = unwrap<StructType>(StructTy);
+ return wrap(Ty->getTypeAtIndex(i));
LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) {
return unwrap<StructType>(StructTy)->isPacked();
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 1e9d9a5..1478bff 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -1249,7 +1249,8 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
OperandTraits<GetElementPtrInst>::op_end(this) -
- SourceElementType(GEPI.SourceElementType) {
+ SourceElementType(GEPI.SourceElementType),
+ ResultElementType(GEPI.ResultElementType) {
std::copy(GEPI.op_begin(), GEPI.op_end(), op_begin());
SubclassOptionalData = GEPI.SubclassOptionalData;
@@ -2120,7 +2121,7 @@ unsigned CastInst::isEliminableCastPair(
{ 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // FPToSI |
{ 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // UIToFP +- firstOp
{ 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // SIToFP |
- { 99,99,99, 0, 0,99,99, 1, 0,99,99, 4, 0}, // FPTrunc |
+ { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // FPTrunc |
{ 99,99,99, 2, 2,99,99,10, 2,99,99, 4, 0}, // FPExt |
{ 1, 0, 0,99,99, 0, 0,99,99,99, 7, 3, 0}, // PtrToInt |
{ 99,99,99,99,99,99,99,99,99,11,99,15, 0}, // IntToPtr |
diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp
index 44c4532..7bcd829 100644
--- a/lib/IR/LLVMContext.cpp
+++ b/lib/IR/LLVMContext.cpp
@@ -240,15 +240,12 @@ void LLVMContext::emitError(unsigned LocCookie, const Twine &ErrorStr) {
// Metadata Kind Uniquing
-/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
+/// Return a unique non-zero ID for the specified metadata kind.
unsigned LLVMContext::getMDKindID(StringRef Name) const {
- assert(!std::isdigit(Name.front()) &&
- "Named metadata may not start with a digit");
// If this is new, assign it its ID.
- return pImpl->CustomMDKindNames.insert(std::make_pair(
- Name,
- pImpl->CustomMDKindNames.size()))
+ return pImpl->CustomMDKindNames.insert(
+ std::make_pair(
+ Name, pImpl->CustomMDKindNames.size()))
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index f81db60..3a57336 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -922,6 +922,8 @@ public:
DenseMap<Value *, ValueAsMetadata *> ValuesAsMetadata;
DenseMap<Metadata *, MetadataAsValue *> MetadataAsValues;
+ DenseMap<const Value*, ValueName*> ValueNames;
#include "llvm/IR/Metadata.def"
diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp
index bbec642..27d98a2 100644
--- a/lib/IR/LegacyPassManager.cpp
+++ b/lib/IR/LegacyPassManager.cpp
@@ -293,10 +293,8 @@ public:
// Delete on the fly managers.
~MPPassManager() override {
- for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
- I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
- I != E; ++I) {
- FunctionPassManagerImpl *FPP = I->second;
+ for (auto &OnTheFlyManager : OnTheFlyManagers) {
+ FunctionPassManagerImpl *FPP = OnTheFlyManager.second;
delete FPP;
@@ -465,9 +463,8 @@ public:
~TimingInfo() {
// Delete all of the timers, which accumulate their info into the
// TimerGroup.
- for (DenseMap<Pass*, Timer*>::iterator I = TimingData.begin(),
- E = TimingData.end(); I != E; ++I)
- delete I->second;
+ for (auto &I : TimingData)
+ delete I.second;
// TimerGroup is deleted next, printing the report.
@@ -510,9 +507,7 @@ PMTopLevelManager::setLastUser(ArrayRef<Pass*> AnalysisPasses, Pass *P) {
if (P->getResolver())
PDepth = P->getResolver()->getPMDataManager().getDepth();
- for (SmallVectorImpl<Pass *>::const_iterator I = AnalysisPasses.begin(),
- E = AnalysisPasses.end(); I != E; ++I) {
- Pass *AP = *I;
+ for (Pass *AP : AnalysisPasses) {
LastUser[AP] = P;
if (P == AP)
@@ -693,22 +688,19 @@ void PMTopLevelManager::schedulePass(Pass *P) {
Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
// Check pass managers
- for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
- E = PassManagers.end(); I != E; ++I)
- if (Pass *P = (*I)->findAnalysisPass(AID, false))
+ for (PMDataManager *PassManager : PassManagers)
+ if (Pass *P = PassManager->findAnalysisPass(AID, false))
return P;
// Check other pass managers
- for (SmallVectorImpl<PMDataManager *>::iterator
- I = IndirectPassManagers.begin(),
- E = IndirectPassManagers.end(); I != E; ++I)
- if (Pass *P = (*I)->findAnalysisPass(AID, false))
+ for (PMDataManager *IndirectPassManager : IndirectPassManagers)
+ if (Pass *P = IndirectPassManager->findAnalysisPass(AID, false))
return P;
// Check the immutable passes. Iterate in reverse order so that we find
// the most recently registered passes first.
- for (SmallVectorImpl<ImmutablePass *>::reverse_iterator I =
- ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E; ++I) {
+ for (auto I = ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E;
+ ++I) {
AnalysisID PI = (*I)->getPassID();
if (PI == AID)
return *I;
@@ -718,11 +710,9 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
assert(PassInf && "Expected all immutable passes to be initialized");
const std::vector<const PassInfo*> &ImmPI =
- for (std::vector<const PassInfo*>::const_iterator II = ImmPI.begin(),
- EE = ImmPI.end(); II != EE; ++II) {
- if ((*II)->getTypeInfo() == AID)
+ for (const PassInfo *PI : ImmPI)
+ if (PI->getTypeInfo() == AID)
return *I;
- }
return nullptr;
@@ -754,9 +744,8 @@ void PMTopLevelManager::dumpPasses() const {
// (sometimes indirectly), but there's no inheritance relationship
// between PMDataManager and Pass, so we have to getAsPass to get
// from a PMDataManager* to a Pass*.
- for (SmallVectorImpl<PMDataManager *>::const_iterator I =
- PassManagers.begin(), E = PassManagers.end(); I != E; ++I)
- (*I)->getAsPass()->dumpPassStructure(1);
+ for (PMDataManager *Manager : PassManagers)
+ Manager->getAsPass()->dumpPassStructure(1);
void PMTopLevelManager::dumpArguments() const {
@@ -1426,11 +1415,8 @@ bool FunctionPassManagerImpl::doInitialization(Module &M) {
- SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses();
- for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
- E = IPV.end(); I != E; ++I) {
- Changed |= (*I)->doInitialization(M);
- }
+ for (ImmutablePass *ImPass : getImmutablePasses())
+ Changed |= ImPass->doInitialization(M);
for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
Changed |= getContainedManager(Index)->doInitialization(M);
@@ -1444,11 +1430,8 @@ bool FunctionPassManagerImpl::doFinalization(Module &M) {
for (int Index = getNumContainedManagers() - 1; Index >= 0; --Index)
Changed |= getContainedManager(Index)->doFinalization(M);
- SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses();
- for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
- E = IPV.end(); I != E; ++I) {
- Changed |= (*I)->doFinalization(M);
- }
+ for (ImmutablePass *ImPass : getImmutablePasses())
+ Changed |= ImPass->doFinalization(M);
return Changed;
@@ -1553,8 +1536,8 @@ bool FPPassManager::runOnFunction(Function &F) {
bool FPPassManager::runOnModule(Module &M) {
bool Changed = false;
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- Changed |= runOnFunction(*I);
+ for (Function &F : M)
+ Changed |= runOnFunction(F);
return Changed;
@@ -1588,10 +1571,8 @@ MPPassManager::runOnModule(Module &M) {
bool Changed = false;
// Initialize on-the-fly passes
- for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
- I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
- I != E; ++I) {
- FunctionPassManagerImpl *FPP = I->second;
+ for (auto &OnTheFlyManager : OnTheFlyManagers) {
+ FunctionPassManagerImpl *FPP = OnTheFlyManager.second;
Changed |= FPP->doInitialization(M);
@@ -1632,10 +1613,8 @@ MPPassManager::runOnModule(Module &M) {
Changed |= getContainedPass(Index)->doFinalization(M);
// Finalize on-the-fly passes
- for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
- I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
- I != E; ++I) {
- FunctionPassManagerImpl *FPP = I->second;
+ for (auto &OnTheFlyManager : OnTheFlyManagers) {
+ FunctionPassManagerImpl *FPP = OnTheFlyManager.second;
// We don't know when is the last time an on-the-fly pass is run,
// so we need to releaseMemory / finalize here
@@ -1711,11 +1690,8 @@ bool PassManagerImpl::run(Module &M) {
- SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses();
- for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
- E = IPV.end(); I != E; ++I) {
- Changed |= (*I)->doInitialization(M);
- }
+ for (ImmutablePass *ImPass : getImmutablePasses())
+ Changed |= ImPass->doInitialization(M);
for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) {
@@ -1723,10 +1699,8 @@ bool PassManagerImpl::run(Module &M) {
- for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
- E = IPV.end(); I != E; ++I) {
- Changed |= (*I)->doFinalization(M);
- }
+ for (ImmutablePass *ImPass : getImmutablePasses())
+ Changed |= ImPass->doFinalization(M);
return Changed;
@@ -1822,9 +1796,8 @@ void PMStack::push(PMDataManager *PM) {
// Dump content of the pass manager stack.
void PMStack::dump() const {
- for (std::vector<PMDataManager *>::const_iterator I = S.begin(),
- E = S.end(); I != E; ++I)
- dbgs() << (*I)->getAsPass()->getPassName() << ' ';
+ for (PMDataManager *Manager : S)
+ dbgs() << Manager->getAsPass()->getPassName() << ' ';
if (!S.empty())
dbgs() << '\n';
diff --git a/lib/IR/MDBuilder.cpp b/lib/IR/MDBuilder.cpp
index 354592d..b4c5ca7 100644
--- a/lib/IR/MDBuilder.cpp
+++ b/lib/IR/MDBuilder.cpp
@@ -168,9 +168,16 @@ MDNode *MDBuilder::createTBAAScalarTypeNode(StringRef Name, MDNode *Parent,
/// \brief Return metadata for a TBAA tag node with the given
/// base type, access type and offset relative to the base type.
MDNode *MDBuilder::createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType,
- uint64_t Offset) {
+ uint64_t Offset, bool IsConstant) {
Type *Int64 = Type::getInt64Ty(Context);
- Metadata *Ops[3] = {BaseType, AccessType,
- createConstant(ConstantInt::get(Int64, Offset))};
- return MDNode::get(Context, Ops);
+ if (IsConstant) {
+ Metadata *Ops[4] = {BaseType, AccessType,
+ createConstant(ConstantInt::get(Int64, Offset)),
+ createConstant(ConstantInt::get(Int64, 1))};
+ return MDNode::get(Context, Ops);
+ } else {
+ Metadata *Ops[3] = {BaseType, AccessType,
+ createConstant(ConstantInt::get(Int64, Offset))};
+ return MDNode::get(Context, Ops);
+ }
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index 23a17a5..75b4046 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -256,9 +256,9 @@ ValueAsMetadata *ValueAsMetadata::get(Value *V) {
if (!Entry) {
assert((isa<Constant>(V) || isa<Argument>(V) || isa<Instruction>(V)) &&
"Expected constant or function-local value");
- assert(!V->NameAndIsUsedByMD.getInt() &&
+ assert(!V->IsUsedByMD &&
"Expected this to be the only metadata use");
- V->NameAndIsUsedByMD.setInt(true);
+ V->IsUsedByMD = true;
if (auto *C = dyn_cast<Constant>(V))
Entry = new ConstantAsMetadata(C);
@@ -302,15 +302,15 @@ void ValueAsMetadata::handleRAUW(Value *From, Value *To) {
auto &Store = Context.pImpl->ValuesAsMetadata;
auto I = Store.find(From);
if (I == Store.end()) {
- assert(!From->NameAndIsUsedByMD.getInt() &&
+ assert(!From->IsUsedByMD &&
"Expected From not to be used by metadata");
// Remove old entry from the map.
- assert(From->NameAndIsUsedByMD.getInt() &&
+ assert(From->IsUsedByMD &&
"Expected From to be used by metadata");
- From->NameAndIsUsedByMD.setInt(false);
+ From->IsUsedByMD = false;
ValueAsMetadata *MD = I->second;
assert(MD && "Expected valid metadata");
assert(MD->getValue() == From && "Expected valid mapping");
@@ -346,9 +346,9 @@ void ValueAsMetadata::handleRAUW(Value *From, Value *To) {
// Update MD in place (and update the map entry).
- assert(!To->NameAndIsUsedByMD.getInt() &&
+ assert(!To->IsUsedByMD &&
"Expected this to be the only metadata use");
- To->NameAndIsUsedByMD.setInt(true);
+ To->IsUsedByMD = true;
MD->V = To;
Entry = MD;
diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp
index 1c40516..b5c4e5d 100644
--- a/lib/IR/Type.cpp
+++ b/lib/IR/Type.cpp
@@ -307,12 +307,13 @@ IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
// Check for the built-in integer types
switch (NumBits) {
- case 1: return cast<IntegerType>(Type::getInt1Ty(C));
- case 8: return cast<IntegerType>(Type::getInt8Ty(C));
- case 16: return cast<IntegerType>(Type::getInt16Ty(C));
- case 32: return cast<IntegerType>(Type::getInt32Ty(C));
- case 64: return cast<IntegerType>(Type::getInt64Ty(C));
- default:
+ case 1: return cast<IntegerType>(Type::getInt1Ty(C));
+ case 8: return cast<IntegerType>(Type::getInt8Ty(C));
+ case 16: return cast<IntegerType>(Type::getInt16Ty(C));
+ case 32: return cast<IntegerType>(Type::getInt32Ty(C));
+ case 64: return cast<IntegerType>(Type::getInt64Ty(C));
+ case 128: return cast<IntegerType>(Type::getInt128Ty(C));
+ default:
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index fd0ed31..dcf0ad5 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -46,8 +46,9 @@ static inline Type *checkType(Type *Ty) {
Value::Value(Type *ty, unsigned scid)
- : VTy(checkType(ty)), UseList(nullptr), SubclassID(scid), HasValueHandle(0),
- SubclassOptionalData(0), SubclassData(0), NumOperands(0) {
+ : VTy(checkType(ty)), UseList(nullptr), SubclassID(scid),
+ HasValueHandle(0), SubclassOptionalData(0), SubclassData(0),
+ NumOperands(0), IsUsedByMD(false), HasName(false) {
// FIXME: Why isn't this in the subclass gunk??
// Note, we cannot call isa<CallInst> before the CallInst has been
// constructed.
@@ -157,11 +158,39 @@ static bool getSymTab(Value *V, ValueSymbolTable *&ST) {
return false;
+ValueName *Value::getValueName() const {
+ if (!HasName) return nullptr;
+ LLVMContext &Ctx = getContext();
+ auto I = Ctx.pImpl->ValueNames.find(this);
+ assert(I != Ctx.pImpl->ValueNames.end() &&
+ "No name entry found!");
+ return I->second;
+void Value::setValueName(ValueName *VN) {
+ LLVMContext &Ctx = getContext();
+ assert(HasName == Ctx.pImpl->ValueNames.count(this) &&
+ "HasName bit out of sync!");
+ if (!VN) {
+ if (HasName)
+ Ctx.pImpl->ValueNames.erase(this);
+ HasName = false;
+ return;
+ }
+ HasName = true;
+ Ctx.pImpl->ValueNames[this] = VN;
StringRef Value::getName() const {
// Make sure the empty string is still a C string. For historical reasons,
// some clients want to call .data() on the result and expect it to be null
// terminated.
- if (!getValueName())
+ if (!hasName())
return StringRef("", 0);
return getValueName()->getKey();
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index 3cf13a0..716d66a 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -250,8 +250,8 @@ bool LTOCodeGenerator::compileOptimizedToFile(const char **name,
return true;
-const void *LTOCodeGenerator::compileOptimized(size_t *length,
- std::string &errMsg) {
+LTOCodeGenerator::compileOptimized(std::string &errMsg) {
const char *name;
if (!compileOptimizedToFile(&name, errMsg))
return nullptr;
@@ -264,16 +264,11 @@ const void *LTOCodeGenerator::compileOptimized(size_t *length,
return nullptr;
- NativeObjectFile = std::move(*BufferOrErr);
// remove temp files
- // return buffer, unless error
- if (!NativeObjectFile)
- return nullptr;
- *length = NativeObjectFile->getBufferSize();
- return NativeObjectFile->getBufferStart();
+ return std::move(*BufferOrErr);
@@ -289,16 +284,14 @@ bool LTOCodeGenerator::compile_to_file(const char **name,
return compileOptimizedToFile(name, errMsg);
-const void* LTOCodeGenerator::compile(size_t *length,
- bool disableInline,
- bool disableGVNLoadPRE,
- bool disableVectorization,
- std::string &errMsg) {
+LTOCodeGenerator::compile(bool disableInline, bool disableGVNLoadPRE,
+ bool disableVectorization, std::string &errMsg) {
if (!optimize(disableInline, disableGVNLoadPRE,
disableVectorization, errMsg))
return nullptr;
- return compileOptimized(length, errMsg);
+ return compileOptimized(errMsg);
bool LTOCodeGenerator::determineTarget(std::string &errMsg) {
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 3cee0c4..13c5ca9 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -12,7 +12,6 @@ add_llvm_library(LLVMMC
- MCELF.cpp
@@ -36,6 +35,7 @@ add_llvm_library(LLVMMC
+ MCSymbolELF.cpp
diff --git a/lib/MC/ConstantPools.cpp b/lib/MC/ConstantPools.cpp
index a723aa8..f7649fb 100644
--- a/lib/MC/ConstantPools.cpp
+++ b/lib/MC/ConstantPools.cpp
@@ -40,7 +40,7 @@ const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context,
MCSymbol *CPEntryLabel = Context.createTempSymbol();
Entries.push_back(ConstantPoolEntry(CPEntryLabel, Value, Size));
- return MCSymbolRefExpr::Create(CPEntryLabel, Context);
+ return MCSymbolRefExpr::create(CPEntryLabel, Context);
bool ConstantPool::empty() { return Entries.empty(); }
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 18746d1..0765937 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -21,12 +21,11 @@
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCELF.h"
-#include "llvm/MC/MCELFSymbolFlags.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Compression.h"
@@ -71,23 +70,20 @@ public:
class ELFObjectWriter : public MCObjectWriter {
static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind);
- static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant);
static uint64_t SymbolValue(const MCSymbol &Sym, const MCAsmLayout &Layout);
- static bool isInSymtab(const MCAsmLayout &Layout, const MCSymbol &Symbol,
+ static bool isInSymtab(const MCAsmLayout &Layout, const MCSymbolELF &Symbol,
bool Used, bool Renamed);
- static bool isLocal(const MCSymbol &Symbol, bool isUsedInReloc);
/// Helper struct for containing some precomputed information on symbols.
struct ELFSymbolData {
- const MCSymbol *Symbol;
- uint64_t StringIndex;
+ const MCSymbolELF *Symbol;
uint32_t SectionIndex;
StringRef Name;
// Support lexicographic sorting.
bool operator<(const ELFSymbolData &RHS) const {
- unsigned LHSType = MCELF::GetType(Symbol->getData());
- unsigned RHSType = MCELF::GetType(RHS.Symbol->getData());
+ unsigned LHSType = Symbol->getType();
+ unsigned RHSType = RHS.Symbol->getType();
return false;
@@ -101,9 +97,7 @@ class ELFObjectWriter : public MCObjectWriter {
/// The target specific ELF writer instance.
std::unique_ptr<MCELFObjectTargetWriter> TargetObjectWriter;
- SmallPtrSet<const MCSymbol *, 16> UsedInReloc;
- SmallPtrSet<const MCSymbol *, 16> WeakrefUsedInReloc;
- DenseMap<const MCSymbol *, const MCSymbol *> Renames;
+ DenseMap<const MCSymbolELF *, const MCSymbolELF *> Renames;
llvm::DenseMap<const MCSectionELF *, std::vector<ELFRelocationEntry>>
@@ -113,15 +107,9 @@ class ELFObjectWriter : public MCObjectWriter {
/// @{
StringTableBuilder StrTabBuilder;
- std::vector<uint64_t> FileSymbolData;
- std::vector<ELFSymbolData> LocalSymbolData;
- std::vector<ELFSymbolData> ExternalSymbolData;
- std::vector<ELFSymbolData> UndefinedSymbolData;
/// @}
- bool NeedsGOT;
// This holds the symbol table index of the last local symbol.
unsigned LastLocalSymbolIndex;
// This holds the .strtab section index.
@@ -145,23 +133,17 @@ class ELFObjectWriter : public MCObjectWriter {
return TargetObjectWriter->GetRelocType(Target, Fixup, IsPCRel);
+ void align(unsigned Alignment);
ELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_pwrite_stream &OS,
bool IsLittleEndian)
- : MCObjectWriter(OS, IsLittleEndian), TargetObjectWriter(MOTW),
- NeedsGOT(false) {}
+ : MCObjectWriter(OS, IsLittleEndian), TargetObjectWriter(MOTW) {}
void reset() override {
- UsedInReloc.clear();
- WeakrefUsedInReloc.clear();
- FileSymbolData.clear();
- LocalSymbolData.clear();
- ExternalSymbolData.clear();
- UndefinedSymbolData.clear();
- NeedsGOT = false;
@@ -170,9 +152,9 @@ class ELFObjectWriter : public MCObjectWriter {
void WriteWord(uint64_t W) {
if (is64Bit())
- Write64(W);
+ write64(W);
- Write32(W);
+ write32(W);
template <typename T> void write(T Val) {
@@ -184,29 +166,23 @@ class ELFObjectWriter : public MCObjectWriter {
void writeHeader(const MCAssembler &Asm);
- void WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD,
- const MCAsmLayout &Layout);
+ void writeSymbol(SymbolTableWriter &Writer, uint32_t StringIndex,
+ ELFSymbolData &MSD, const MCAsmLayout &Layout);
// Start and end offset of each section
typedef std::map<const MCSectionELF *, std::pair<uint64_t, uint64_t>>
- void writeSymbolTable(MCContext &Ctx, const MCAsmLayout &Layout,
- SectionOffsetsTy &SectionOffsets);
bool shouldRelocateWithSymbol(const MCAssembler &Asm,
const MCSymbolRefExpr *RefA,
const MCSymbol *Sym, uint64_t C,
unsigned Type) const;
- void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
+ void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, bool &IsPCRel,
uint64_t &FixedValue) override;
- uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm,
- const MCSymbol *S);
// Map from a signature symbol to the group section index
typedef DenseMap<const MCSymbol *, unsigned> RevGroupMapTy;
@@ -217,17 +193,18 @@ class ELFObjectWriter : public MCObjectWriter {
/// \param RevGroupMap - Maps a signature symbol to the group section.
void computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
const SectionIndexMapTy &SectionIndexMap,
- const RevGroupMapTy &RevGroupMap);
+ const RevGroupMapTy &RevGroupMap,
+ SectionOffsetsTy &SectionOffsets);
MCSectionELF *createRelocationSection(MCContext &Ctx,
const MCSectionELF &Sec);
const MCSectionELF *createStringTable(MCContext &Ctx);
- void ExecutePostLayoutBinding(MCAssembler &Asm,
+ void executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) override;
- void writeSectionHeader(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ void writeSectionHeader(const MCAsmLayout &Layout,
const SectionIndexMapTy &SectionIndexMap,
const SectionOffsetsTy &SectionOffsets);
@@ -241,7 +218,7 @@ class ELFObjectWriter : public MCObjectWriter {
void writeRelocations(const MCAssembler &Asm, const MCSectionELF &Sec);
- bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+ bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
const MCSymbol &SymA,
const MCFragment &FB,
bool InSet,
@@ -249,13 +226,18 @@ class ELFObjectWriter : public MCObjectWriter {
bool isWeak(const MCSymbol &Sym) const override;
- void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
+ void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
void writeSection(const SectionIndexMapTy &SectionIndexMap,
uint32_t GroupSymbolIndex, uint64_t Offset, uint64_t Size,
const MCSectionELF &Section);
+void ELFObjectWriter::align(unsigned Alignment) {
+ uint64_t Padding = OffsetToAlignment(OS.tell(), Alignment);
+ WriteZeros(Padding);
unsigned ELFObjectWriter::addToSectionTable(const MCSectionELF *Sec) {
@@ -319,27 +301,6 @@ bool ELFObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
-bool ELFObjectWriter::RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant) {
- switch (Variant) {
- default:
- return false;
- case MCSymbolRefExpr::VK_GOT:
- case MCSymbolRefExpr::VK_PLT:
- case MCSymbolRefExpr::VK_GOTPCREL:
- case MCSymbolRefExpr::VK_GOTOFF:
- case MCSymbolRefExpr::VK_TPOFF:
- case MCSymbolRefExpr::VK_TLSGD:
- case MCSymbolRefExpr::VK_GOTTPOFF:
- case MCSymbolRefExpr::VK_INDNTPOFF:
- case MCSymbolRefExpr::VK_NTPOFF:
- case MCSymbolRefExpr::VK_GOTNTPOFF:
- case MCSymbolRefExpr::VK_TLSLDM:
- case MCSymbolRefExpr::VK_DTPOFF:
- case MCSymbolRefExpr::VK_TLSLD:
- return true;
- }
@@ -353,54 +314,53 @@ void ELFObjectWriter::writeHeader(const MCAssembler &Asm) {
// emitWord method behaves differently for ELF32 and ELF64, writing
// 4 bytes in the former and 8 in the latter.
- WriteBytes(ELF::ElfMagic); // e_ident[EI_MAG0] to e_ident[EI_MAG3]
+ writeBytes(ELF::ElfMagic); // e_ident[EI_MAG0] to e_ident[EI_MAG3]
- Write8(is64Bit() ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS]
+ write8(is64Bit() ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS]
// e_ident[EI_DATA]
- Write8(isLittleEndian() ? ELF::ELFDATA2LSB : ELF::ELFDATA2MSB);
+ write8(isLittleEndian() ? ELF::ELFDATA2LSB : ELF::ELFDATA2MSB);
- Write8(ELF::EV_CURRENT); // e_ident[EI_VERSION]
+ write8(ELF::EV_CURRENT); // e_ident[EI_VERSION]
// e_ident[EI_OSABI]
- Write8(TargetObjectWriter->getOSABI());
- Write8(0); // e_ident[EI_ABIVERSION]
+ write8(TargetObjectWriter->getOSABI());
+ write8(0); // e_ident[EI_ABIVERSION]
- Write16(ELF::ET_REL); // e_type
+ write16(ELF::ET_REL); // e_type
- Write16(TargetObjectWriter->getEMachine()); // e_machine = target
+ write16(TargetObjectWriter->getEMachine()); // e_machine = target
- Write32(ELF::EV_CURRENT); // e_version
+ write32(ELF::EV_CURRENT); // e_version
WriteWord(0); // e_entry, no entry point in .o file
WriteWord(0); // e_phoff, no program header for .o
WriteWord(0); // e_shoff = sec hdr table off in bytes
// e_flags = whatever the target wants
- Write32(Asm.getELFHeaderEFlags());
+ write32(Asm.getELFHeaderEFlags());
// e_ehsize = ELF header size
- Write16(is64Bit() ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr));
+ write16(is64Bit() ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr));
- Write16(0); // e_phentsize = prog header entry size
- Write16(0); // e_phnum = # prog header entries = 0
+ write16(0); // e_phentsize = prog header entry size
+ write16(0); // e_phnum = # prog header entries = 0
// e_shentsize = Section header entry size
- Write16(is64Bit() ? sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr));
+ write16(is64Bit() ? sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr));
// e_shnum = # of section header ents
- Write16(0);
+ write16(0);
// e_shstrndx = Section # of '.shstrtab'
assert(StringTableIndex < ELF::SHN_LORESERVE);
- Write16(StringTableIndex);
+ write16(StringTableIndex);
uint64_t ELFObjectWriter::SymbolValue(const MCSymbol &Sym,
const MCAsmLayout &Layout) {
- MCSymbolData &Data = Sym.getData();
- if (Data.isCommon() && Data.isExternal())
- return Data.getCommonAlignment();
+ if (Sym.isCommon() && Sym.isExternal())
+ return Sym.getCommonAlignment();
uint64_t Res;
if (!Layout.getSymbolOffset(Sym, Res))
@@ -412,22 +372,20 @@ uint64_t ELFObjectWriter::SymbolValue(const MCSymbol &Sym,
return Res;
-void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) {
// The presence of symbol versions causes undefined symbols and
// versions declared with @@@ to be renamed.
- for (const MCSymbol &Alias : Asm.symbols()) {
- MCSymbolData &OriginalData = Alias.getData();
+ for (const MCSymbol &A : Asm.symbols()) {
+ const auto &Alias = cast<MCSymbolELF>(A);
// Not an alias.
if (!Alias.isVariable())
auto *Ref = dyn_cast<MCSymbolRefExpr>(Alias.getVariableValue());
if (!Ref)
- const MCSymbol &Symbol = Ref->getSymbol();
- MCSymbolData &SD = Asm.getSymbolData(Symbol);
+ const auto &Symbol = cast<MCSymbolELF>(Ref->getSymbol());
StringRef AliasName = Alias.getName();
size_t Pos = AliasName.find('@');
@@ -436,8 +394,8 @@ void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
// Aliases defined with .symvar copy the binding from the symbol they alias.
// This is the first place we are able to copy this information.
- OriginalData.setExternal(SD.isExternal());
- MCELF::SetBinding(OriginalData, MCELF::GetBinding(SD));
+ Alias.setExternal(Symbol.isExternal());
+ Alias.setBinding(Symbol.getBinding());
StringRef Rest = AliasName.substr(Pos);
if (!Symbol.isUndefined() && !Rest.startswith("@@@"))
@@ -487,40 +445,39 @@ static uint8_t mergeTypeForSet(uint8_t origType, uint8_t newType) {
return Type;
-void ELFObjectWriter::WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD,
+void ELFObjectWriter::writeSymbol(SymbolTableWriter &Writer,
+ uint32_t StringIndex, ELFSymbolData &MSD,
const MCAsmLayout &Layout) {
- MCSymbolData &OrigData = MSD.Symbol->getData();
- assert((!OrigData.getFragment() ||
- (OrigData.getFragment()->getParent() == &MSD.Symbol->getSection())) &&
+ const auto &Symbol = cast<MCSymbolELF>(*MSD.Symbol);
+ assert((!Symbol.getFragment() ||
+ (Symbol.getFragment()->getParent() == &Symbol.getSection())) &&
"The symbol's section doesn't match the fragment's symbol");
- const MCSymbol *Base = Layout.getBaseSymbol(*MSD.Symbol);
+ const MCSymbolELF *Base =
+ cast_or_null<MCSymbolELF>(Layout.getBaseSymbol(Symbol));
// This has to be in sync with when computeSymbolTable uses SHN_ABS or
- bool IsReserved = !Base || OrigData.isCommon();
+ bool IsReserved = !Base || Symbol.isCommon();
// Binding and Type share the same byte as upper and lower nibbles
- uint8_t Binding = MCELF::GetBinding(OrigData);
- uint8_t Type = MCELF::GetType(OrigData);
- MCSymbolData *BaseSD = nullptr;
+ uint8_t Binding = Symbol.getBinding();
+ uint8_t Type = Symbol.getType();
if (Base) {
- BaseSD = &Layout.getAssembler().getSymbolData(*Base);
- Type = mergeTypeForSet(Type, MCELF::GetType(*BaseSD));
+ Type = mergeTypeForSet(Type, Base->getType());
- uint8_t Info = (Binding << ELF_STB_Shift) | (Type << ELF_STT_Shift);
+ uint8_t Info = (Binding << 4) | Type;
// Other and Visibility share the same byte with Visibility using the lower
// 2 bits
- uint8_t Visibility = MCELF::GetVisibility(OrigData);
- uint8_t Other = MCELF::getOther(OrigData) << (ELF_STO_Shift - ELF_STV_Shift);
- Other |= Visibility;
+ uint8_t Visibility = Symbol.getVisibility();
+ uint8_t Other = Symbol.getOther() | Visibility;
uint64_t Value = SymbolValue(*MSD.Symbol, Layout);
uint64_t Size = 0;
- const MCExpr *ESize = OrigData.getSize();
+ const MCExpr *ESize = MSD.Symbol->getSize();
if (!ESize && Base)
- ESize = BaseSD->getSize();
+ ESize = Base->getSize();
if (ESize) {
int64_t Res;
@@ -530,78 +487,8 @@ void ELFObjectWriter::WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD,
// Write out the symbol table entry
- Writer.writeSymbol(MSD.StringIndex, Info, Value, Size, Other,
- MSD.SectionIndex, IsReserved);
-void ELFObjectWriter::writeSymbolTable(MCContext &Ctx,
- const MCAsmLayout &Layout,
- SectionOffsetsTy &SectionOffsets) {
- const MCSectionELF *SymtabSection = SectionTable[SymbolTableIndex - 1];
- // The string table must be emitted first because we need the index
- // into the string table for all the symbol names.
- SymbolTableWriter Writer(*this, is64Bit());
- uint64_t Padding =
- OffsetToAlignment(OS.tell(), SymtabSection->getAlignment());
- WriteZeros(Padding);
- uint64_t SecStart = OS.tell();
- // The first entry is the undefined symbol entry.
- Writer.writeSymbol(0, 0, 0, 0, 0, 0, false);
- for (unsigned i = 0, e = FileSymbolData.size(); i != e; ++i) {
- Writer.writeSymbol(FileSymbolData[i], ELF::STT_FILE | ELF::STB_LOCAL, 0, 0,
- }
- // Write the symbol table entries.
- LastLocalSymbolIndex = FileSymbolData.size() + LocalSymbolData.size() + 1;
- for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) {
- ELFSymbolData &MSD = LocalSymbolData[i];
- WriteSymbol(Writer, MSD, Layout);
- }
- for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) {
- ELFSymbolData &MSD = ExternalSymbolData[i];
- MCSymbolData &Data = MSD.Symbol->getData();
- assert(((Data.getFlags() & ELF_STB_Global) ||
- (Data.getFlags() & ELF_STB_Weak)) &&
- "External symbol requires STB_GLOBAL or STB_WEAK flag");
- WriteSymbol(Writer, MSD, Layout);
- if (MCELF::GetBinding(Data) == ELF::STB_LOCAL)
- LastLocalSymbolIndex++;
- }
- for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) {
- ELFSymbolData &MSD = UndefinedSymbolData[i];
- MCSymbolData &Data = MSD.Symbol->getData();
- WriteSymbol(Writer, MSD, Layout);
- if (MCELF::GetBinding(Data) == ELF::STB_LOCAL)
- LastLocalSymbolIndex++;
- }
- uint64_t SecEnd = OS.tell();
- SectionOffsets[SymtabSection] = std::make_pair(SecStart, SecEnd);
- ArrayRef<uint32_t> ShndxIndexes = Writer.getShndxIndexes();
- if (ShndxIndexes.empty()) {
- assert(SymtabShndxSectionIndex == 0);
- return;
- }
- assert(SymtabShndxSectionIndex != 0);
- SecStart = OS.tell();
- const MCSectionELF *SymtabShndxSection =
- SectionTable[SymtabShndxSectionIndex - 1];
- for (uint32_t Index : ShndxIndexes)
- write(Index);
- SecEnd = OS.tell();
- SectionOffsets[SymtabShndxSection] = std::make_pair(SecStart, SecEnd);
+ Writer.writeSymbol(StringIndex, Info, Value, Size, Other, MSD.SectionIndex,
+ IsReserved);
// It is always valid to create a relocation with a symbol. It is preferable
@@ -609,10 +496,9 @@ void ELFObjectWriter::writeSymbolTable(MCContext &Ctx,
// allows us to omit some local symbols from the symbol table.
bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
const MCSymbolRefExpr *RefA,
- const MCSymbol *Sym, uint64_t C,
+ const MCSymbol *S, uint64_t C,
unsigned Type) const {
- MCSymbolData *SD = Sym ? &Sym->getData() : nullptr;
+ const auto *Sym = cast_or_null<MCSymbolELF>(S);
// A PCRel relocation to an absolute value has no symbol (or section). We
// represent that with a relocation to a null section.
if (!RefA)
@@ -651,7 +537,7 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
if (Sym->isUndefined())
return true;
- unsigned Binding = MCELF::GetBinding(*SD);
+ unsigned Binding = Sym->getBinding();
switch(Binding) {
llvm_unreachable("Invalid Binding");
@@ -701,38 +587,19 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
if (Asm.isThumbFunc(Sym))
return true;
- if (TargetObjectWriter->needsRelocateWithSymbol(*SD, Type))
+ if (TargetObjectWriter->needsRelocateWithSymbol(*Sym, Type))
return true;
return false;
-static const MCSymbol *getWeakRef(const MCSymbolRefExpr &Ref) {
- const MCSymbol &Sym = Ref.getSymbol();
- if (Ref.getKind() == MCSymbolRefExpr::VK_WEAKREF)
- return &Sym;
- if (!Sym.isVariable())
- return nullptr;
- const MCExpr *Expr = Sym.getVariableValue();
- const auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr);
- if (!Inner)
- return nullptr;
- if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF)
- return &Inner->getSymbol();
- return nullptr;
// True if the assembler knows nothing about the final value of the symbol.
// This doesn't cover the comdat issues, since in those cases the assembler
// can at least know that all symbols in the section will move together.
-static bool isWeak(const MCSymbolData &D) {
- if (MCELF::GetType(D) == ELF::STT_GNU_IFUNC)
+static bool isWeak(const MCSymbolELF &Sym) {
+ if (Sym.getType() == ELF::STT_GNU_IFUNC)
return true;
- switch (MCELF::GetBinding(D)) {
+ switch (Sym.getBinding()) {
llvm_unreachable("Unknown binding");
@@ -745,7 +612,7 @@ static bool isWeak(const MCSymbolData &D) {
-void ELFObjectWriter::RecordRelocation(MCAssembler &Asm,
+void ELFObjectWriter::recordRelocation(MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
@@ -770,7 +637,7 @@ void ELFObjectWriter::RecordRelocation(MCAssembler &Asm,
"No relocation available to represent this relative expression");
- const MCSymbol &SymB = RefB->getSymbol();
+ const auto &SymB = cast<MCSymbolELF>(RefB->getSymbol());
if (SymB.isUndefined())
@@ -784,7 +651,7 @@ void ELFObjectWriter::RecordRelocation(MCAssembler &Asm,
Fixup.getLoc(), "Cannot represent a difference across sections");
- if (::isWeak(SymB.getData()))
+ if (::isWeak(SymB))
Fixup.getLoc(), "Cannot represent a subtraction with a weak symbol");
@@ -796,7 +663,18 @@ void ELFObjectWriter::RecordRelocation(MCAssembler &Asm,
// We either rejected the fixup or folded B into C at this point.
const MCSymbolRefExpr *RefA = Target.getSymA();
- const MCSymbol *SymA = RefA ? &RefA->getSymbol() : nullptr;
+ const auto *SymA = RefA ? cast<MCSymbolELF>(&RefA->getSymbol()) : nullptr;
+ bool ViaWeakRef = false;
+ if (SymA && SymA->isVariable()) {
+ const MCExpr *Expr = SymA->getVariableValue();
+ if (const auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr)) {
+ if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) {
+ SymA = cast<MCSymbolELF>(&Inner->getSymbol());
+ ViaWeakRef = true;
+ }
+ }
+ }
unsigned Type = GetRelocType(Target, Fixup, IsPCRel);
bool RelocateWithSymbol = shouldRelocateWithSymbol(Asm, RefA, SymA, C, Type);
@@ -811,50 +689,36 @@ void ELFObjectWriter::RecordRelocation(MCAssembler &Asm,
FixedValue = C;
- // FIXME: What is this!?!?
- MCSymbolRefExpr::VariantKind Modifier =
- RefA ? RefA->getKind() : MCSymbolRefExpr::VK_None;
- if (RelocNeedsGOT(Modifier))
- NeedsGOT = true;
if (!RelocateWithSymbol) {
const MCSection *SecA =
(SymA && !SymA->isUndefined()) ? &SymA->getSection() : nullptr;
auto *ELFSec = cast_or_null<MCSectionELF>(SecA);
- MCSymbol *SectionSymbol =
- ELFSec ? Asm.getContext().getOrCreateSectionSymbol(*ELFSec)
- : nullptr;
+ const auto *SectionSymbol =
+ ELFSec ? cast<MCSymbolELF>(ELFSec->getBeginSymbol()) : nullptr;
+ if (SectionSymbol)
+ SectionSymbol->setUsedInReloc();
ELFRelocationEntry Rec(FixupOffset, SectionSymbol, Type, Addend);
if (SymA) {
- if (const MCSymbol *R = Renames.lookup(SymA))
+ if (const MCSymbolELF *R = Renames.lookup(SymA))
SymA = R;
- if (const MCSymbol *WeakRef = getWeakRef(*RefA))
- WeakrefUsedInReloc.insert(WeakRef);
+ if (ViaWeakRef)
+ SymA->setIsWeakrefUsedInReloc();
- UsedInReloc.insert(SymA);
+ SymA->setUsedInReloc();
ELFRelocationEntry Rec(FixupOffset, SymA, Type, Addend);
-ELFObjectWriter::getSymbolIndexInSymbolTable(const MCAssembler &Asm,
- const MCSymbol *S) {
- assert(S->hasData());
- return S->getIndex();
bool ELFObjectWriter::isInSymtab(const MCAsmLayout &Layout,
- const MCSymbol &Symbol, bool Used,
+ const MCSymbolELF &Symbol, bool Used,
bool Renamed) {
- const MCSymbolData &Data = Symbol.getData();
if (Symbol.isVariable()) {
const MCExpr *Expr = Symbol.getVariableValue();
if (const MCSymbolRefExpr *Ref = dyn_cast<MCSymbolRefExpr>(Expr)) {
@@ -869,34 +733,19 @@ bool ELFObjectWriter::isInSymtab(const MCAsmLayout &Layout,
if (Renamed)
return false;
- if (Symbol.getName() == "_GLOBAL_OFFSET_TABLE_")
- return true;
- if (Symbol.isVariable()) {
- const MCSymbol *Base = Layout.getBaseSymbol(Symbol);
- if (Base && Base->isUndefined())
- return false;
+ if (Symbol.isVariable() && Symbol.isUndefined()) {
+ // FIXME: this is here just to diagnose the case of a var = commmon_sym.
+ Layout.getBaseSymbol(Symbol);
+ return false;
- bool IsGlobal = MCELF::GetBinding(Data) == ELF::STB_GLOBAL;
- if (!Symbol.isVariable() && Symbol.isUndefined() && !IsGlobal)
+ if (Symbol.isUndefined() && !Symbol.isBindingSet())
return false;
if (Symbol.isTemporary())
return false;
- return true;
-bool ELFObjectWriter::isLocal(const MCSymbol &Symbol, bool isUsedInReloc) {
- const MCSymbolData &Data = Symbol.getData();
- if (Data.isExternal())
- return false;
- if (Symbol.isDefined())
- return true;
- if (isUsedInReloc)
+ if (Symbol.getType() == ELF::STT_SECTION)
return false;
return true;
@@ -904,9 +753,11 @@ bool ELFObjectWriter::isLocal(const MCSymbol &Symbol, bool isUsedInReloc) {
void ELFObjectWriter::computeSymbolTable(
MCAssembler &Asm, const MCAsmLayout &Layout,
- const SectionIndexMapTy &SectionIndexMap,
- const RevGroupMapTy &RevGroupMap) {
+ const SectionIndexMapTy &SectionIndexMap, const RevGroupMapTy &RevGroupMap,
+ SectionOffsetsTy &SectionOffsets) {
MCContext &Ctx = Asm.getContext();
+ SymbolTableWriter Writer(*this, is64Bit());
// Symbol table
unsigned EntrySize = is64Bit() ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32;
MCSectionELF *SymtabSection =
@@ -914,49 +765,37 @@ void ELFObjectWriter::computeSymbolTable(
SymtabSection->setAlignment(is64Bit() ? 8 : 4);
SymbolTableIndex = addToSectionTable(SymtabSection);
- // FIXME: Is this the correct place to do this?
- // FIXME: Why is an undefined reference to _GLOBAL_OFFSET_TABLE_ needed?
- if (NeedsGOT) {
- StringRef Name = "_GLOBAL_OFFSET_TABLE_";
- MCSymbol *Sym = Asm.getContext().getOrCreateSymbol(Name);
- MCSymbolData &Data = Asm.getOrCreateSymbolData(*Sym);
- Data.setExternal(true);
- MCELF::SetBinding(Data, ELF::STB_GLOBAL);
- }
+ align(SymtabSection->getAlignment());
+ uint64_t SecStart = OS.tell();
+ // The first entry is the undefined symbol entry.
+ Writer.writeSymbol(0, 0, 0, 0, 0, 0, false);
+ std::vector<ELFSymbolData> LocalSymbolData;
+ std::vector<ELFSymbolData> ExternalSymbolData;
// Add the data for the symbols.
bool HasLargeSectionIndex = false;
- for (const MCSymbol &Symbol : Asm.symbols()) {
- MCSymbolData &SD = Symbol.getData();
- bool Used = UsedInReloc.count(&Symbol);
- bool WeakrefUsed = WeakrefUsedInReloc.count(&Symbol);
- bool isSignature = RevGroupMap.count(&Symbol);
+ for (const MCSymbol &S : Asm.symbols()) {
+ const auto &Symbol = cast<MCSymbolELF>(S);
+ bool Used = Symbol.isUsedInReloc();
+ bool WeakrefUsed = Symbol.isWeakrefUsedInReloc();
+ bool isSignature = Symbol.isSignature();
if (!isInSymtab(Layout, Symbol, Used || WeakrefUsed || isSignature,
ELFSymbolData MSD;
- MSD.Symbol = &Symbol;
- const MCSymbol *BaseSymbol = Layout.getBaseSymbol(Symbol);
- // Undefined symbols are global, but this is the first place we
- // are able to set it.
- bool Local = isLocal(Symbol, Used);
- if (!Local && MCELF::GetBinding(SD) == ELF::STB_LOCAL) {
- assert(BaseSymbol);
- MCSymbolData &BaseData = Asm.getSymbolData(*BaseSymbol);
- MCELF::SetBinding(BaseData, ELF::STB_GLOBAL);
- }
+ MSD.Symbol = cast<MCSymbolELF>(&Symbol);
- if (!BaseSymbol) {
+ bool Local = Symbol.getBinding() == ELF::STB_LOCAL;
+ if (Symbol.isAbsolute()) {
MSD.SectionIndex = ELF::SHN_ABS;
- } else if (SD.isCommon()) {
+ } else if (Symbol.isCommon()) {
MSD.SectionIndex = ELF::SHN_COMMON;
- } else if (BaseSymbol->isUndefined()) {
+ } else if (Symbol.isUndefined()) {
if (isSignature && !Used) {
MSD.SectionIndex = RevGroupMap.lookup(&Symbol);
if (MSD.SectionIndex >= ELF::SHN_LORESERVE)
@@ -964,11 +803,9 @@ void ELFObjectWriter::computeSymbolTable(
} else {
MSD.SectionIndex = ELF::SHN_UNDEF;
- if (!Used && WeakrefUsed)
- MCELF::SetBinding(SD, ELF::STB_WEAK);
} else {
const MCSectionELF &Section =
- static_cast<const MCSectionELF&>(BaseSymbol->getSection());
+ static_cast<const MCSectionELF &>(Symbol.getSection());
MSD.SectionIndex = SectionIndexMap.lookup(&Section);
assert(MSD.SectionIndex && "Invalid section index!");
if (MSD.SectionIndex >= ELF::SHN_LORESERVE)
@@ -1015,12 +852,10 @@ void ELFObjectWriter::computeSymbolTable(
// Sections have their own string table
- if (MCELF::GetType(SD) != ELF::STT_SECTION)
+ if (Symbol.getType() != ELF::STT_SECTION)
MSD.Name = StrTabBuilder.add(Name);
- if (MSD.SectionIndex == ELF::SHN_UNDEF)
- UndefinedSymbolData.push_back(MSD);
- else if (Local)
+ if (Local)
@@ -1033,38 +868,60 @@ void ELFObjectWriter::computeSymbolTable(
- for (auto i = Asm.file_names_begin(), e = Asm.file_names_end(); i != e; ++i)
- StrTabBuilder.add(*i);
+ ArrayRef<std::string> FileNames = Asm.getFileNames();
+ for (const std::string &Name : FileNames)
+ StrTabBuilder.add(Name);
- for (auto i = Asm.file_names_begin(), e = Asm.file_names_end(); i != e; ++i)
- FileSymbolData.push_back(StrTabBuilder.getOffset(*i));
- for (ELFSymbolData &MSD : LocalSymbolData)
- MSD.StringIndex = MCELF::GetType(MSD.Symbol->getData()) == ELF::STT_SECTION
- ? 0
- : StrTabBuilder.getOffset(MSD.Name);
- for (ELFSymbolData &MSD : ExternalSymbolData)
- MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name);
- for (ELFSymbolData& MSD : UndefinedSymbolData)
- MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name);
+ for (const std::string &Name : FileNames)
+ Writer.writeSymbol(StrTabBuilder.getOffset(Name),
+ ELF::SHN_ABS, true);
// Symbols are required to be in lexicographic order.
array_pod_sort(LocalSymbolData.begin(), LocalSymbolData.end());
array_pod_sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
- array_pod_sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
// Set the symbol indices. Local symbols must come before all other
// symbols with non-local bindings.
- unsigned Index = FileSymbolData.size() + 1;
- for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
- LocalSymbolData[i].Symbol->setIndex(Index++);
- for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
- ExternalSymbolData[i].Symbol->setIndex(Index++);
- for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
- UndefinedSymbolData[i].Symbol->setIndex(Index++);
+ unsigned Index = FileNames.size() + 1;
+ for (ELFSymbolData &MSD : LocalSymbolData) {
+ unsigned StringIndex = MSD.Symbol->getType() == ELF::STT_SECTION
+ ? 0
+ : StrTabBuilder.getOffset(MSD.Name);
+ MSD.Symbol->setIndex(Index++);
+ writeSymbol(Writer, StringIndex, MSD, Layout);
+ }
+ // Write the symbol table entries.
+ LastLocalSymbolIndex = Index;
+ for (ELFSymbolData &MSD : ExternalSymbolData) {
+ unsigned StringIndex = StrTabBuilder.getOffset(MSD.Name);
+ MSD.Symbol->setIndex(Index++);
+ writeSymbol(Writer, StringIndex, MSD, Layout);
+ assert(MSD.Symbol->getBinding() != ELF::STB_LOCAL);
+ }
+ uint64_t SecEnd = OS.tell();
+ SectionOffsets[SymtabSection] = std::make_pair(SecStart, SecEnd);
+ ArrayRef<uint32_t> ShndxIndexes = Writer.getShndxIndexes();
+ if (ShndxIndexes.empty()) {
+ assert(SymtabShndxSectionIndex == 0);
+ return;
+ }
+ assert(SymtabShndxSectionIndex != 0);
+ SecStart = OS.tell();
+ const MCSectionELF *SymtabShndxSection =
+ SectionTable[SymtabShndxSectionIndex - 1];
+ for (uint32_t Index : ShndxIndexes)
+ write(Index);
+ SecEnd = OS.tell();
+ SectionOffsets[SymtabShndxSection] = std::make_pair(SecStart, SecEnd);
MCSectionELF *
@@ -1182,14 +1039,14 @@ void ELFObjectWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
uint32_t Link, uint32_t Info,
uint64_t Alignment,
uint64_t EntrySize) {
- Write32(Name); // sh_name: index into string table
- Write32(Type); // sh_type
+ write32(Name); // sh_name: index into string table
+ write32(Type); // sh_type
WriteWord(Flags); // sh_flags
WriteWord(Address); // sh_addr
WriteWord(Offset); // sh_offset
WriteWord(Size); // sh_size
- Write32(Link); // sh_link
- Write32(Info); // sh_info
+ write32(Link); // sh_link
+ write32(Info); // sh_info
WriteWord(Alignment); // sh_addralign
WriteWord(EntrySize); // sh_entsize
@@ -1204,8 +1061,7 @@ void ELFObjectWriter::writeRelocations(const MCAssembler &Asm,
for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
const ELFRelocationEntry &Entry = Relocs[e - i - 1];
- unsigned Index =
- Entry.Symbol ? getSymbolIndexInSymbolTable(Asm, Entry.Symbol) : 0;
+ unsigned Index = Entry.Symbol ? Entry.Symbol->getIndex() : 0;
if (is64Bit()) {
@@ -1292,8 +1148,7 @@ void ELFObjectWriter::writeSection(const SectionIndexMapTy &SectionIndexMap,
void ELFObjectWriter::writeSectionHeader(
- const MCAssembler &Asm, const MCAsmLayout &Layout,
- const SectionIndexMapTy &SectionIndexMap,
+ const MCAsmLayout &Layout, const SectionIndexMapTy &SectionIndexMap,
const SectionOffsetsTy &SectionOffsets) {
const unsigned NumSections = SectionTable.size();
@@ -1308,7 +1163,7 @@ void ELFObjectWriter::writeSectionHeader(
if (Type != ELF::SHT_GROUP)
GroupSymbolIndex = 0;
- GroupSymbolIndex = getSymbolIndexInSymbolTable(Asm, Section->getGroup());
+ GroupSymbolIndex = Section->getGroup()->getIndex();
const std::pair<uint64_t, uint64_t> &Offsets =
@@ -1323,7 +1178,7 @@ void ELFObjectWriter::writeSectionHeader(
-void ELFObjectWriter::WriteObject(MCAssembler &Asm,
+void ELFObjectWriter::writeObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
MCContext &Ctx = Asm.getContext();
MCSectionELF *StrtabSection =
@@ -1345,13 +1200,12 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm,
for (MCSection &Sec : Asm) {
MCSectionELF &Section = static_cast<MCSectionELF &>(Sec);
- uint64_t Padding = OffsetToAlignment(OS.tell(), Section.getAlignment());
- WriteZeros(Padding);
+ align(Section.getAlignment());
// Remember the offset into the file for this section.
uint64_t SecStart = OS.tell();
- const MCSymbol *SignatureSymbol = Section.getGroup();
+ const MCSymbolELF *SignatureSymbol = Section.getGroup();
writeSectionData(Asm, Section, Layout);
uint64_t SecEnd = OS.tell();
@@ -1360,7 +1214,7 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm,
MCSectionELF *RelSection = createRelocationSection(Ctx, Section);
if (SignatureSymbol) {
- Asm.getOrCreateSymbolData(*SignatureSymbol);
+ Asm.registerSymbol(*SignatureSymbol);
unsigned &GroupIdx = RevGroupMap[SignatureSymbol];
if (!GroupIdx) {
MCSectionELF *Group = Ctx.createELFGroupSection(SignatureSymbol);
@@ -1368,9 +1222,11 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm,
- GroupMembers[SignatureSymbol].push_back(&Section);
+ std::vector<const MCSectionELF *> &Members =
+ GroupMembers[SignatureSymbol];
+ Members.push_back(&Section);
if (RelSection)
- GroupMembers[SignatureSymbol].push_back(RelSection);
+ Members.push_back(RelSection);
SectionIndexMap[&Section] = addToSectionTable(&Section);
@@ -1381,8 +1237,7 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm,
for (MCSectionELF *Group : Groups) {
- uint64_t Padding = OffsetToAlignment(OS.tell(), Group->getAlignment());
- WriteZeros(Padding);
+ align(Group->getAlignment());
// Remember the offset into the file for this section.
uint64_t SecStart = OS.tell();
@@ -1400,11 +1255,10 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm,
// Compute symbol table information.
- computeSymbolTable(Asm, Layout, SectionIndexMap, RevGroupMap);
+ computeSymbolTable(Asm, Layout, SectionIndexMap, RevGroupMap, SectionOffsets);
for (MCSectionELF *RelSection : Relocations) {
- uint64_t Padding = OffsetToAlignment(OS.tell(), RelSection->getAlignment());
- WriteZeros(Padding);
+ align(RelSection->getAlignment());
// Remember the offset into the file for this section.
uint64_t SecStart = OS.tell();
@@ -1415,8 +1269,6 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm,
SectionOffsets[RelSection] = std::make_pair(SecStart, SecEnd);
- writeSymbolTable(Ctx, Layout, SectionOffsets);
uint64_t SecStart = OS.tell();
const MCSectionELF *Sec = createStringTable(Ctx);
@@ -1425,13 +1277,12 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm,
uint64_t NaturalAlignment = is64Bit() ? 8 : 4;
- uint64_t Padding = OffsetToAlignment(OS.tell(), NaturalAlignment);
- WriteZeros(Padding);
+ align(NaturalAlignment);
const unsigned SectionHeaderOffset = OS.tell();
// ... then the section header table ...
- writeSectionHeader(Asm, Layout, SectionIndexMap, SectionOffsets);
+ writeSectionHeader(Layout, SectionIndexMap, SectionOffsets);
uint16_t NumSections = (SectionTable.size() + 1 >= ELF::SHN_LORESERVE)
? (uint16_t)ELF::SHN_UNDEF
@@ -1459,21 +1310,22 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm,
-bool ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
- const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
+bool ELFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
+ const MCAssembler &Asm, const MCSymbol &SA, const MCFragment &FB,
bool InSet, bool IsPCRel) const {
+ const auto &SymA = cast<MCSymbolELF>(SA);
if (IsPCRel) {
- if (::isWeak(SymA.getData()))
+ if (::isWeak(SymA))
return false;
- return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB,
+ return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB,
InSet, IsPCRel);
-bool ELFObjectWriter::isWeak(const MCSymbol &Sym) const {
- const MCSymbolData &SD = Sym.getData();
- if (::isWeak(SD))
+bool ELFObjectWriter::isWeak(const MCSymbol &S) const {
+ const auto &Sym = cast<MCSymbolELF>(S);
+ if (::isWeak(Sym))
return true;
// It is invalid to replace a reference to a global in a comdat
@@ -1482,7 +1334,7 @@ bool ELFObjectWriter::isWeak(const MCSymbol &Sym) const {
// We could try to return false for more cases, like the reference
// being in the same comdat or Sym being an alias to another global,
// but it is not clear if it is worth the effort.
- if (MCELF::GetBinding(SD) != ELF::STB_GLOBAL)
+ if (Sym.getBinding() != ELF::STB_GLOBAL)
return false;
if (!Sym.isInSection())
diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp
index c42757b..36c65b7 100644
--- a/lib/MC/MCAsmBackend.cpp
+++ b/lib/MC/MCAsmBackend.cpp
@@ -16,27 +16,33 @@ MCAsmBackend::MCAsmBackend() : HasDataInCodeSupport(false) {}
MCAsmBackend::~MCAsmBackend() {}
-const MCFixupKindInfo &
-MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
static const MCFixupKindInfo Builtins[] = {
- { "FK_Data_1", 0, 8, 0 },
- { "FK_Data_2", 0, 16, 0 },
- { "FK_Data_4", 0, 32, 0 },
- { "FK_Data_8", 0, 64, 0 },
- { "FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
- { "FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
- { "FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "FK_PCRel_8", 0, 64, MCFixupKindInfo::FKF_IsPCRel },
- { "FK_GPRel_1", 0, 8, 0 },
- { "FK_GPRel_2", 0, 16, 0 },
- { "FK_GPRel_4", 0, 32, 0 },
- { "FK_GPRel_8", 0, 64, 0 },
- { "FK_SecRel_1", 0, 8, 0 },
- { "FK_SecRel_2", 0, 16, 0 },
- { "FK_SecRel_4", 0, 32, 0 },
- { "FK_SecRel_8", 0, 64, 0 }
- };
+ {"FK_Data_1", 0, 8, 0},
+ {"FK_Data_2", 0, 16, 0},
+ {"FK_Data_4", 0, 32, 0},
+ {"FK_Data_8", 0, 64, 0},
+ {"FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel},
+ {"FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"FK_PCRel_8", 0, 64, MCFixupKindInfo::FKF_IsPCRel},
+ {"FK_GPRel_1", 0, 8, 0},
+ {"FK_GPRel_2", 0, 16, 0},
+ {"FK_GPRel_4", 0, 32, 0},
+ {"FK_GPRel_8", 0, 64, 0},
+ {"FK_SecRel_1", 0, 8, 0},
+ {"FK_SecRel_2", 0, 16, 0},
+ {"FK_SecRel_4", 0, 32, 0},
+ {"FK_SecRel_8", 0, 64, 0}};
assert((size_t)Kind <= array_lengthof(Builtins) && "Unknown fixup kind");
return Builtins[Kind];
+bool MCAsmBackend::fixupNeedsRelaxationAdvanced(
+ const MCFixup &Fixup, bool Resolved, uint64_t Value,
+ const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const {
+ if (!Resolved)
+ return true;
+ return fixupNeedsRelaxation(Fixup, Value, DF, Layout);
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index b61f5b1..100dc7c 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -50,6 +50,7 @@ MCAsmInfo::MCAsmInfo() {
Code64Directive = ".code64";
AssemblerDialect = 0;
AllowAtInName = false;
+ SupportsQuotedNames = true;
UseDataRegionDirectives = false;
ZeroDirective = "\\t";
AsciiDirective = "\t.ascii\t";
@@ -128,12 +129,31 @@ MCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym,
unsigned Encoding,
MCStreamer &Streamer) const {
if (!(Encoding & dwarf::DW_EH_PE_pcrel))
- return MCSymbolRefExpr::Create(Sym, Streamer.getContext());
+ return MCSymbolRefExpr::create(Sym, Streamer.getContext());
MCContext &Context = Streamer.getContext();
- const MCExpr *Res = MCSymbolRefExpr::Create(Sym, Context);
+ const MCExpr *Res = MCSymbolRefExpr::create(Sym, Context);
MCSymbol *PCSym = Context.createTempSymbol();
- const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context);
- return MCBinaryExpr::CreateSub(Res, PC, Context);
+ const MCExpr *PC = MCSymbolRefExpr::create(PCSym, Context);
+ return MCBinaryExpr::createSub(Res, PC, Context);
+static bool isAcceptableChar(char C) {
+ return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
+ (C >= '0' && C <= '9') || C == '_' || C == '$' || C == '.' || C == '@';
+bool MCAsmInfo::isValidUnquotedName(StringRef Name) const {
+ if (Name.empty())
+ return false;
+ // If any of the characters in the string is an unacceptable character, force
+ // quotes.
+ for (char C : Name) {
+ if (!isAcceptableChar(C))
+ return false;
+ }
+ return true;
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index cabe63b..0f405ad 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -24,7 +24,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
@@ -136,9 +136,10 @@ public:
void EmitCOFFSymbolStorageClass(int StorageClass) override;
void EmitCOFFSymbolType(int Type) override;
void EndCOFFSymbolDef() override;
+ void EmitCOFFSafeSEH(MCSymbol const *Symbol) override;
void EmitCOFFSectionIndex(MCSymbol const *Symbol) override;
void EmitCOFFSecRel32(MCSymbol const *Symbol) override;
- void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) override;
+ void emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value) override;
void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override;
@@ -307,7 +308,9 @@ void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
- OS << *Symbol << MAI->getLabelSuffix();
+ Symbol->print(OS, MAI);
+ OS << MAI->getLabelSuffix();
@@ -327,7 +330,7 @@ void MCAsmStreamer::EmitLOHDirective(MCLOHType Kind, const MCLOHArgs &Args) {
if (!IsFirst)
OS << ", ";
IsFirst = false;
- OS << **It;
+ (*It)->print(OS, MAI);
@@ -383,20 +386,28 @@ void MCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {
// MCSymbols when they have spaces in them.
OS << "\t.thumb_func";
// Only Mach-O hasSubsectionsViaSymbols()
- if (MAI->hasSubsectionsViaSymbols())
- OS << '\t' << *Func;
+ if (MAI->hasSubsectionsViaSymbols()) {
+ OS << '\t';
+ Func->print(OS, MAI);
+ }
void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
- OS << *Symbol << " = " << *Value;
+ Symbol->print(OS, MAI);
+ OS << " = ";
+ Value->print(OS, MAI);
MCStreamer::EmitAssignment(Symbol, Value);
void MCAsmStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
- OS << ".weakref " << *Alias << ", " << *Symbol;
+ OS << ".weakref ";
+ Alias->print(OS, MAI);
+ OS << ", ";
+ Symbol->print(OS, MAI);
@@ -413,8 +424,9 @@ bool MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_ELF_TypeGnuUniqueObject: /// .type _foo, @gnu_unique_object
if (!MAI->hasDotTypeDotSizeDirective())
return false; // Symbol attribute not supported
- OS << "\t.type\t" << *Symbol << ','
- << ((MAI->getCommentString()[0] != '@') ? '@' : '%');
+ OS << "\t.type\t";
+ Symbol->print(OS, MAI);
+ OS << ',' << ((MAI->getCommentString()[0] != '@') ? '@' : '%');
switch (Attribute) {
default: return false;
case MCSA_ELF_TypeFunction: OS << "function"; break;
@@ -455,19 +467,23 @@ bool MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break;
- OS << *Symbol;
+ Symbol->print(OS, MAI);
return true;
void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
- OS << ".desc" << ' ' << *Symbol << ',' << DescValue;
+ OS << ".desc" << ' ';
+ Symbol->print(OS, MAI);
+ OS << ',' << DescValue;
void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {
- OS << "\t.def\t " << *Symbol << ';';
+ OS << "\t.def\t ";
+ Symbol->print(OS, MAI);
+ OS << ';';
@@ -486,19 +502,30 @@ void MCAsmStreamer::EndCOFFSymbolDef() {
+void MCAsmStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {
+ OS << "\t.safeseh\t" << *Symbol;
+ EmitEOL();
void MCAsmStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
- OS << "\t.secidx\t" << *Symbol;
+ OS << "\t.secidx\t";
+ Symbol->print(OS, MAI);
void MCAsmStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) {
- OS << "\t.secrel32\t" << *Symbol;
+ OS << "\t.secrel32\t";
+ Symbol->print(OS, MAI);
-void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+void MCAsmStreamer::emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value) {
- OS << "\t.size\t" << *Symbol << ", " << *Value << '\n';
+ OS << "\t.size\t";
+ Symbol->print(OS, MAI);
+ OS << ", ";
+ Value->print(OS, MAI);
+ OS << '\n';
void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
@@ -506,7 +533,10 @@ void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
// Common symbols do not belong to any actual section.
AssignSection(Symbol, nullptr);
- OS << "\t.comm\t" << *Symbol << ',' << Size;
+ OS << "\t.comm\t";
+ Symbol->print(OS, MAI);
+ OS << ',' << Size;
if (ByteAlignment != 0) {
if (MAI->getCOMMDirectiveAlignmentIsInBytes())
OS << ',' << ByteAlignment;
@@ -525,7 +555,10 @@ void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
// Common symbols do not belong to any actual section.
AssignSection(Symbol, nullptr);
- OS << "\t.lcomm\t" << *Symbol << ',' << Size;
+ OS << "\t.lcomm\t";
+ Symbol->print(OS, MAI);
+ OS << ',' << Size;
if (ByteAlign > 1) {
switch (MAI->getLCOMMDirectiveAlignmentType()) {
case LCOMM::NoAlignment:
@@ -555,7 +588,9 @@ void MCAsmStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
OS << MOSection->getSegmentName() << "," << MOSection->getSectionName();
if (Symbol) {
- OS << ',' << *Symbol << ',' << Size;
+ OS << ',';
+ Symbol->print(OS, MAI);
+ OS << ',' << Size;
if (ByteAlignment != 0)
OS << ',' << Log2_32(ByteAlignment);
@@ -572,7 +607,9 @@ void MCAsmStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
assert(Symbol && "Symbol shouldn't be NULL!");
// Instead of using the Section we'll just use the shortcut.
// This is a mach-o specific directive and section.
- OS << ".tbss " << *Symbol << ", " << Size;
+ OS << ".tbss ";
+ Symbol->print(OS, MAI);
+ OS << ", " << Size;
// Output align if we have it. We default to 1 so don't bother printing
// that.
@@ -643,7 +680,7 @@ void MCAsmStreamer::EmitBytes(StringRef Data) {
void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size) {
- EmitValue(MCConstantExpr::Create(Value, getContext()), Size);
+ EmitValue(MCConstantExpr::create(Value, getContext()), Size);
void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
@@ -662,7 +699,7 @@ void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
if (!Directive) {
int64_t IntValue;
- if (!Value->EvaluateAsAbsolute(IntValue))
+ if (!Value->evaluateAsAbsolute(IntValue))
report_fatal_error("Don't know how to emit this value.");
// We couldn't handle the requested integer size so we fallback by breaking
@@ -697,39 +734,44 @@ void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
assert(Directive && "Invalid size for machine code value!");
- OS << Directive << *Value;
+ OS << Directive;
+ Value->print(OS, MAI);
void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value) {
int64_t IntValue;
- if (Value->EvaluateAsAbsolute(IntValue)) {
+ if (Value->evaluateAsAbsolute(IntValue)) {
- OS << ".uleb128 " << *Value;
+ OS << ".uleb128 ";
+ Value->print(OS, MAI);
void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
int64_t IntValue;
- if (Value->EvaluateAsAbsolute(IntValue)) {
+ if (Value->evaluateAsAbsolute(IntValue)) {
- OS << ".sleb128 " << *Value;
+ OS << ".sleb128 ";
+ Value->print(OS, MAI);
void MCAsmStreamer::EmitGPRel64Value(const MCExpr *Value) {
assert(MAI->getGPRel64Directive() != nullptr);
- OS << MAI->getGPRel64Directive() << *Value;
+ OS << MAI->getGPRel64Directive();
+ Value->print(OS, MAI);
void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
assert(MAI->getGPRel32Directive() != nullptr);
- OS << MAI->getGPRel32Directive() << *Value;
+ OS << MAI->getGPRel32Directive();
+ Value->print(OS, MAI);
@@ -816,7 +858,9 @@ void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
bool MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
unsigned char Value) {
// FIXME: Verify that Offset is associated with the current section.
- OS << ".org " << *Offset << ", " << (unsigned) Value;
+ OS << ".org ";
+ Offset->print(OS, MAI);
+ OS << ", " << (unsigned)Value;
return false;
@@ -987,13 +1031,15 @@ void MCAsmStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
void MCAsmStreamer::EmitCFIPersonality(const MCSymbol *Sym,
unsigned Encoding) {
MCStreamer::EmitCFIPersonality(Sym, Encoding);
- OS << "\t.cfi_personality " << Encoding << ", " << *Sym;
+ OS << "\t.cfi_personality " << Encoding << ", ";
+ Sym->print(OS, MAI);
void MCAsmStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
MCStreamer::EmitCFILsda(Sym, Encoding);
- OS << "\t.cfi_lsda " << Encoding << ", " << *Sym;
+ OS << "\t.cfi_lsda " << Encoding << ", ";
+ Sym->print(OS, MAI);
@@ -1057,7 +1103,8 @@ void MCAsmStreamer::EmitCFIWindowSave() {
void MCAsmStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol) {
- OS << ".seh_proc " << *Symbol;
+ OS << ".seh_proc ";
+ Symbol->print(OS, MAI);
@@ -1086,7 +1133,8 @@ void MCAsmStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind,
bool Except) {
MCStreamer::EmitWinEHHandler(Sym, Unwind, Except);
- OS << "\t.seh_handler " << *Sym;
+ OS << "\t.seh_handler ";
+ Sym->print(OS, MAI);
if (Unwind)
OS << ", @unwind";
if (Except)
@@ -1102,9 +1150,9 @@ void MCAsmStreamer::EmitWinEHHandlerData() {
// We only do this so the section switch that terminates the handler
// data block is visible.
WinEH::FrameInfo *CurFrame = getCurrentWinFrameInfo();
- if (MCSection *XData = WinEH::UnwindEmitter::getXDataSection(
- CurFrame->Function, getContext()))
- SwitchSectionNoChange(XData);
+ MCSection *XData =
+ WinEH::UnwindEmitter::getXDataSection(CurFrame->Function, getContext());
+ SwitchSectionNoChange(XData);
OS << "\t.seh_handlerdata";
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 868b0f1..55f5009 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -120,14 +120,13 @@ uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const {
// Simple getSymbolOffset helper for the non-varibale case.
static bool getLabelOffset(const MCAsmLayout &Layout, const MCSymbol &S,
bool ReportError, uint64_t &Val) {
- const MCSymbolData &SD = S.getData();
- if (!SD.getFragment()) {
+ if (!S.getFragment()) {
if (ReportError)
report_fatal_error("unable to evaluate offset to undefined symbol '" +
S.getName() + "'");
return false;
- Val = Layout.getFragmentOffset(SD.getFragment()) + SD.getOffset();
+ Val = Layout.getFragmentOffset(S.getFragment()) + S.getOffset();
return true;
@@ -138,7 +137,7 @@ static bool getSymbolOffsetImpl(const MCAsmLayout &Layout, const MCSymbol &S,
// If SD is a variable, evaluate it.
MCValue Target;
- if (!S.getVariableValue()->EvaluateAsRelocatable(Target, &Layout, nullptr))
+ if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr))
report_fatal_error("unable to evaluate offset for variable '" +
S.getName() + "'");
@@ -195,8 +194,7 @@ const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const {
const MCSymbol &ASym = A->getSymbol();
const MCAssembler &Asm = getAssembler();
- const MCSymbolData &ASD = Asm.getSymbolData(ASym);
- if (ASD.isCommon()) {
+ if (ASym.isCommon()) {
// FIXME: we should probably add a SMLoc to MCExpr.
"Common symbol " + ASym.getName() +
@@ -378,17 +376,17 @@ const MCSymbol *MCAssembler::getAtom(const MCSymbol &S) const {
return &S;
// Absolute and undefined symbols have no defining atom.
- if (!S.getData().getFragment())
+ if (!S.getFragment())
return nullptr;
// Non-linker visible symbols in sections which can't be atomized have no
// defining atom.
if (!getContext().getAsmInfo()->isSectionAtomizableBySymbols(
- *S.getData().getFragment()->getParent()))
+ *S.getFragment()->getParent()))
return nullptr;
// Otherwise, return the atom for the containing fragment.
- return S.getData().getFragment()->getAtom();
+ return S.getFragment()->getAtom();
bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
@@ -396,11 +394,11 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
MCValue &Target, uint64_t &Value) const {
- // FIXME: This code has some duplication with RecordRelocation. We should
+ // FIXME: This code has some duplication with recordRelocation. We should
// probably merge the two into a single callback that tries to evaluate a
// fixup and records a relocation if one is needed.
const MCExpr *Expr = Fixup.getValue();
- if (!Expr->EvaluateAsRelocatable(Target, &Layout, &Fixup))
+ if (!Expr->evaluateAsRelocatable(Target, &Layout, &Fixup))
getContext().reportFatalError(Fixup.getLoc(), "expected relocatable expression");
bool IsPCRel = Backend.getFixupKindInfo(
@@ -418,7 +416,7 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
if (A->getKind() != MCSymbolRefExpr::VK_None || SA.isUndefined()) {
IsResolved = false;
} else {
- IsResolved = getWriter().IsSymbolRefDifferenceFullyResolvedImpl(
+ IsResolved = getWriter().isSymbolRefDifferenceFullyResolvedImpl(
*this, SA, *DF, false, true);
@@ -475,6 +473,9 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
case MCFragment::FT_LEB:
return cast<MCLEBFragment>(F).getContents().size();
+ case MCFragment::FT_SafeSEH:
+ return 4;
case MCFragment::FT_Align: {
const MCAlignFragment &AF = cast<MCAlignFragment>(F);
unsigned Offset = Layout.getFragmentOffset(&AF);
@@ -493,7 +494,7 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
case MCFragment::FT_Org: {
const MCOrgFragment &OF = cast<MCOrgFragment>(F);
int64_t TargetLocation;
- if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, Layout))
+ if (!OF.getOffset().evaluateAsAbsolute(TargetLocation, Layout))
report_fatal_error("expected assembly-time absolute expression");
// FIXME: We need a way to communicate this error.
@@ -575,7 +576,17 @@ void MCAsmLayout::layoutFragment(MCFragment *F) {
/// a MCEncodedFragment.
static void writeFragmentContents(const MCFragment &F, MCObjectWriter *OW) {
const MCEncodedFragment &EF = cast<MCEncodedFragment>(F);
- OW->WriteBytes(EF.getContents());
+ OW->writeBytes(EF.getContents());
+void MCAssembler::registerSymbol(const MCSymbol &Symbol, bool *Created) {
+ bool New = !Symbol.isRegistered();
+ if (Created)
+ *Created = New;
+ if (New) {
+ Symbol.setIsRegistered(true);
+ Symbols.push_back(&Symbol);
+ }
void MCAssembler::writeFragmentPadding(const MCFragment &F, uint64_t FSize,
@@ -659,10 +670,10 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
for (uint64_t i = 0; i != Count; ++i) {
switch (AF.getValueSize()) {
default: llvm_unreachable("Invalid size!");
- case 1: OW->Write8 (uint8_t (AF.getValue())); break;
- case 2: OW->Write16(uint16_t(AF.getValue())); break;
- case 4: OW->Write32(uint32_t(AF.getValue())); break;
- case 8: OW->Write64(uint64_t(AF.getValue())); break;
+ case 1: OW->write8 (uint8_t (AF.getValue())); break;
+ case 2: OW->write16(uint16_t(AF.getValue())); break;
+ case 4: OW->write32(uint32_t(AF.getValue())); break;
+ case 8: OW->write64(uint64_t(AF.getValue())); break;
@@ -692,10 +703,10 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
for (uint64_t i = 0, e = FF.getSize() / FF.getValueSize(); i != e; ++i) {
switch (FF.getValueSize()) {
default: llvm_unreachable("Invalid size!");
- case 1: OW->Write8 (uint8_t (FF.getValue())); break;
- case 2: OW->Write16(uint16_t(FF.getValue())); break;
- case 4: OW->Write32(uint32_t(FF.getValue())); break;
- case 8: OW->Write64(uint64_t(FF.getValue())); break;
+ case 1: OW->write8 (uint8_t (FF.getValue())); break;
+ case 2: OW->write16(uint16_t(FF.getValue())); break;
+ case 4: OW->write32(uint32_t(FF.getValue())); break;
+ case 8: OW->write64(uint64_t(FF.getValue())); break;
@@ -703,7 +714,13 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
case MCFragment::FT_LEB: {
const MCLEBFragment &LF = cast<MCLEBFragment>(F);
- OW->WriteBytes(LF.getContents());
+ OW->writeBytes(LF.getContents());
+ break;
+ }
+ case MCFragment::FT_SafeSEH: {
+ const MCSafeSEHFragment &SF = cast<MCSafeSEHFragment>(F);
+ OW->write32(SF.getSymbol()->getIndex());
@@ -712,19 +729,19 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCOrgFragment &OF = cast<MCOrgFragment>(F);
for (uint64_t i = 0, e = FragmentSize; i != e; ++i)
- OW->Write8(uint8_t(OF.getValue()));
+ OW->write8(uint8_t(OF.getValue()));
case MCFragment::FT_Dwarf: {
const MCDwarfLineAddrFragment &OF = cast<MCDwarfLineAddrFragment>(F);
- OW->WriteBytes(OF.getContents());
+ OW->writeBytes(OF.getContents());
case MCFragment::FT_DwarfFrame: {
const MCDwarfCallFrameFragment &CF = cast<MCDwarfCallFrameFragment>(F);
- OW->WriteBytes(CF.getContents());
+ OW->writeBytes(CF.getContents());
@@ -802,7 +819,7 @@ std::pair<uint64_t, bool> MCAssembler::handleFixup(const MCAsmLayout &Layout,
// The fixup was unresolved, we need a relocation. Inform the object
// writer of the relocation, and give it an opportunity to adjust the
// fixup value if need be.
- getWriter().RecordRelocation(*this, Layout, &F, Fixup, Target, IsPCRel,
+ getWriter().recordRelocation(*this, Layout, &F, Fixup, Target, IsPCRel,
return std::make_pair(FixedValue, IsPCRel);
@@ -857,7 +874,7 @@ void MCAssembler::Finish() {
// Allow the object writer a chance to perform post-layout binding (for
// example, to set the index fields in the symbol data).
- getWriter().ExecutePostLayoutBinding(*this, Layout);
+ getWriter().executePostLayoutBinding(*this, Layout);
// Evaluate and apply the fixups, generating relocation entries as necessary.
for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
@@ -880,7 +897,7 @@ void MCAssembler::Finish() {
// Write the object file.
- getWriter().WriteObject(*this, Layout);
+ getWriter().writeObject(*this, Layout);
stats::ObjectBytes += OS.tell() - StartOffset;
@@ -888,13 +905,11 @@ void MCAssembler::Finish() {
bool MCAssembler::fixupNeedsRelaxation(const MCFixup &Fixup,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const {
- // If we cannot resolve the fixup value, it requires relaxation.
MCValue Target;
uint64_t Value;
- if (!evaluateFixup(Layout, Fixup, DF, Target, Value))
- return true;
- return getBackend().fixupNeedsRelaxation(Fixup, Value, DF, Layout);
+ bool Resolved = evaluateFixup(Layout, Fixup, DF, Target, Value);
+ return getBackend().fixupNeedsRelaxationAdvanced(Fixup, Resolved, Value, DF,
+ Layout);
bool MCAssembler::fragmentNeedsRelaxation(const MCRelaxableFragment *F,
@@ -1088,6 +1103,7 @@ void MCFragment::dump() {
case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break;
case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break;
case MCFragment::FT_LEB: OS << "MCLEBFragment"; break;
+ case MCFragment::FT_SafeSEH: OS << "MCSafeSEHFragment"; break;
OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder
@@ -1180,25 +1196,13 @@ void MCFragment::dump() {
OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned();
+ case MCFragment::FT_SafeSEH: {
+ const MCSafeSEHFragment *F = cast<MCSafeSEHFragment>(this);
+ OS << "\n ";
+ OS << " Sym:" << F->getSymbol();
+ break;
+ }
- OS << ">";
-void MCSymbolData::dump() const {
- raw_ostream &OS = llvm::errs();
- OS << "<MCSymbolData"
- << " Fragment:" << getFragment();
- if (!isCommon())
- OS << " Offset:" << getOffset();
- OS << " Flags:" << getFlags();
- if (isCommon())
- OS << " (common, size:" << getCommonSize()
- << " align: " << getCommonAlignment() << ")";
- if (isExternal())
- OS << " (external)";
- if (isPrivateExtern())
- OS << " (private extern)";
OS << ">";
@@ -1219,7 +1223,6 @@ void MCAssembler::dump() {
OS << "(";
OS << ", Index:" << it->getIndex() << ", ";
- it->getData().dump();
OS << ")";
OS << "]>\n";
@@ -1236,5 +1239,6 @@ void MCAlignFragment::anchor() { }
void MCFillFragment::anchor() { }
void MCOrgFragment::anchor() { }
void MCLEBFragment::anchor() { }
+void MCSafeSEHFragment::anchor() { }
void MCDwarfLineAddrFragment::anchor() { }
void MCDwarfCallFrameFragment::anchor() { }
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 1f2f034..1e52eed 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -20,7 +20,9 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolCOFF.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSymbolMachO.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
@@ -114,13 +116,13 @@ MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
MCSymbol *&Sym = Symbols[NameRef];
if (!Sym)
- Sym = CreateSymbol(NameRef, false);
+ Sym = createSymbol(NameRef, false, false);
return Sym;
-MCSymbol *MCContext::getOrCreateSectionSymbol(const MCSectionELF &Section) {
- MCSymbol *&Sym = SectionSymbols[&Section];
+MCSymbolELF *MCContext::getOrCreateSectionSymbol(const MCSectionELF &Section) {
+ MCSymbolELF *&Sym = SectionSymbols[&Section];
if (Sym)
return Sym;
@@ -128,12 +130,12 @@ MCSymbol *MCContext::getOrCreateSectionSymbol(const MCSectionELF &Section) {
MCSymbol *&OldSym = Symbols[Name];
if (OldSym && OldSym->isUndefined()) {
- Sym = OldSym;
- return OldSym;
+ Sym = cast<MCSymbolELF>(OldSym);
+ return Sym;
auto NameIter = UsedNames.insert(std::make_pair(Name, true)).first;
- Sym = new (*this) MCSymbol(&*NameIter, /*isTemporary*/ false);
+ Sym = new (*this) MCSymbolELF(&*NameIter, /*isTemporary*/ false);
if (!OldSym)
OldSym = Sym;
@@ -157,15 +159,32 @@ MCSymbol *MCContext::getOrCreateLSDASymbol(StringRef FuncName) {
-MCSymbol *MCContext::CreateSymbol(StringRef Name, bool AlwaysAddSuffix) {
- // Determine whether this is an assembler temporary or normal label, if used.
- bool IsTemporary = false;
+MCSymbol *MCContext::createSymbolImpl(const StringMapEntry<bool> *Name,
+ bool IsTemporary) {
+ if (MOFI) {
+ switch (MOFI->getObjectFileType()) {
+ case MCObjectFileInfo::IsCOFF:
+ return new (*this) MCSymbolCOFF(Name, IsTemporary);
+ case MCObjectFileInfo::IsELF:
+ return new (*this) MCSymbolELF(Name, IsTemporary);
+ case MCObjectFileInfo::IsMachO:
+ return new (*this) MCSymbolMachO(Name, IsTemporary);
+ }
+ }
+ return new (*this) MCSymbol(MCSymbol::SymbolKindUnset, Name, IsTemporary);
+MCSymbol *MCContext::createSymbol(StringRef Name, bool AlwaysAddSuffix,
+ bool IsTemporary) {
+ if (IsTemporary && !UseNamesOnTempLabels)
+ return createSymbolImpl(nullptr, true);
+ // Determine whether this is an user writter assembler temporary or normal
+ // label, if used.
+ IsTemporary = false;
if (AllowTemporaryLabels)
IsTemporary = Name.startswith(MAI->getPrivateGlobalPrefix());
- if (IsTemporary && AlwaysAddSuffix && !UseNamesOnTempLabels)
- return new (*this) MCSymbol(nullptr, true);
SmallString<128> NewName = Name;
bool AddSuffix = AlwaysAddSuffix;
unsigned &NextUniqueID = NextID[Name];
@@ -178,8 +197,7 @@ MCSymbol *MCContext::CreateSymbol(StringRef Name, bool AlwaysAddSuffix) {
if (NameEntry.second) {
// Ok, we found a name. Have the MCSymbol object itself refer to the copy
// of the string that is embedded in the UsedNames entry.
- MCSymbol *Result = new (*this) MCSymbol(&*NameEntry.first, IsTemporary);
- return Result;
+ return createSymbolImpl(&*NameEntry.first, IsTemporary);
assert(IsTemporary && "Cannot rename non-temporary symbols");
AddSuffix = true;
@@ -190,13 +208,13 @@ MCSymbol *MCContext::CreateSymbol(StringRef Name, bool AlwaysAddSuffix) {
MCSymbol *MCContext::createTempSymbol(const Twine &Name, bool AlwaysAddSuffix) {
SmallString<128> NameSV;
raw_svector_ostream(NameSV) << MAI->getPrivateGlobalPrefix() << Name;
- return CreateSymbol(NameSV, AlwaysAddSuffix);
+ return createSymbol(NameSV, AlwaysAddSuffix, true);
MCSymbol *MCContext::createLinkerPrivateTempSymbol() {
SmallString<128> NameSV;
raw_svector_ostream(NameSV) << MAI->getLinkerPrivateGlobalPrefix() << "tmp";
- return CreateSymbol(NameSV, true);
+ return createSymbol(NameSV, true, false);
MCSymbol *MCContext::createTempSymbol() {
@@ -295,7 +313,7 @@ void MCContext::renameELFSection(MCSectionELF *Section, StringRef Name) {
MCSectionELF *MCContext::createELFRelSection(StringRef Name, unsigned Type,
unsigned Flags, unsigned EntrySize,
- const MCSymbol *Group,
+ const MCSymbolELF *Group,
const MCSectionELF *Associated) {
StringMap<bool>::iterator I;
bool Inserted;
@@ -310,9 +328,9 @@ MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
unsigned Flags, unsigned EntrySize,
StringRef Group, unsigned UniqueID,
const char *BeginSymName) {
- MCSymbol *GroupSym = nullptr;
+ MCSymbolELF *GroupSym = nullptr;
if (!Group.empty())
- GroupSym = getOrCreateSymbol(Group);
+ GroupSym = cast<MCSymbolELF>(getOrCreateSymbol(Group));
return getELFSection(Section, Type, Flags, EntrySize, GroupSym, UniqueID,
BeginSymName, nullptr);
@@ -320,7 +338,7 @@ MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
unsigned Flags, unsigned EntrySize,
- const MCSymbol *GroupSym,
+ const MCSymbolELF *GroupSym,
unsigned UniqueID,
const char *BeginSymName,
const MCSectionELF *Associated) {
@@ -353,7 +371,7 @@ MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
return Result;
-MCSectionELF *MCContext::createELFGroupSection(const MCSymbol *Group) {
+MCSectionELF *MCContext::createELFGroupSection(const MCSymbolELF *Group) {
MCSectionELF *Result = new (*this)
MCSectionELF(".group", ELF::SHT_GROUP, 0, SectionKind::getReadOnly(), 4,
Group, ~0, nullptr, nullptr);
@@ -447,13 +465,8 @@ bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) {
/// Remove empty sections from SectionStartEndSyms, to avoid generating
/// useless debug info for them.
void MCContext::finalizeDwarfSections(MCStreamer &MCOS) {
- std::vector<MCSection *> Keep;
- for (MCSection *Sec : SectionsForRanges) {
- if (MCOS.mayHaveInstructions(*Sec))
- Keep.push_back(Sec);
- }
- SectionsForRanges.clear();
- SectionsForRanges.insert(Keep.begin(), Keep.end());
+ SectionsForRanges.remove_if(
+ [&](MCSection *Sec) { return !MCOS.mayHaveInstructions(*Sec); });
void MCContext::reportFatalError(SMLoc Loc, const Twine &Msg) const {
diff --git a/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
index 1262e2a..68948d3 100644
--- a/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
+++ b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
@@ -88,9 +88,9 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
if (SymbolicOp.AddSymbol.Name) {
StringRef Name(SymbolicOp.AddSymbol.Name);
MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
- Add = MCSymbolRefExpr::Create(Sym, Ctx);
+ Add = MCSymbolRefExpr::create(Sym, Ctx);
} else {
- Add = MCConstantExpr::Create((int)SymbolicOp.AddSymbol.Value, Ctx);
+ Add = MCConstantExpr::create((int)SymbolicOp.AddSymbol.Value, Ctx);
@@ -99,37 +99,37 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
if (SymbolicOp.SubtractSymbol.Name) {
StringRef Name(SymbolicOp.SubtractSymbol.Name);
MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
- Sub = MCSymbolRefExpr::Create(Sym, Ctx);
+ Sub = MCSymbolRefExpr::create(Sym, Ctx);
} else {
- Sub = MCConstantExpr::Create((int)SymbolicOp.SubtractSymbol.Value, Ctx);
+ Sub = MCConstantExpr::create((int)SymbolicOp.SubtractSymbol.Value, Ctx);
const MCExpr *Off = nullptr;
if (SymbolicOp.Value != 0)
- Off = MCConstantExpr::Create(SymbolicOp.Value, Ctx);
+ Off = MCConstantExpr::create(SymbolicOp.Value, Ctx);
const MCExpr *Expr;
if (Sub) {
const MCExpr *LHS;
if (Add)
- LHS = MCBinaryExpr::CreateSub(Add, Sub, Ctx);
+ LHS = MCBinaryExpr::createSub(Add, Sub, Ctx);
- LHS = MCUnaryExpr::CreateMinus(Sub, Ctx);
+ LHS = MCUnaryExpr::createMinus(Sub, Ctx);
if (Off)
- Expr = MCBinaryExpr::CreateAdd(LHS, Off, Ctx);
+ Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx);
Expr = LHS;
} else if (Add) {
if (Off)
- Expr = MCBinaryExpr::CreateAdd(Add, Off, Ctx);
+ Expr = MCBinaryExpr::createAdd(Add, Off, Ctx);
Expr = Add;
} else {
if (Off)
Expr = Off;
- Expr = MCConstantExpr::Create(0, Ctx);
+ Expr = MCConstantExpr::create(0, Ctx);
Expr = RelInfo->createExprForCAPIVariantKind(Expr, SymbolicOp.VariantKind);
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index a7e83f6..90f96e2 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -98,15 +98,15 @@ static inline const MCExpr *MakeStartMinusEndExpr(const MCStreamer &MCOS,
int IntVal) {
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
const MCExpr *Res =
- MCSymbolRefExpr::Create(&End, Variant, MCOS.getContext());
+ MCSymbolRefExpr::create(&End, Variant, MCOS.getContext());
const MCExpr *RHS =
- MCSymbolRefExpr::Create(&Start, Variant, MCOS.getContext());
+ MCSymbolRefExpr::create(&Start, Variant, MCOS.getContext());
const MCExpr *Res1 =
- MCBinaryExpr::Create(MCBinaryExpr::Sub, Res, RHS, MCOS.getContext());
+ MCBinaryExpr::create(MCBinaryExpr::Sub, Res, RHS, MCOS.getContext());
const MCExpr *Res2 =
- MCConstantExpr::Create(IntVal, MCOS.getContext());
+ MCConstantExpr::create(IntVal, MCOS.getContext());
const MCExpr *Res3 =
- MCBinaryExpr::Create(MCBinaryExpr::Sub, Res1, Res2, MCOS.getContext());
+ MCBinaryExpr::create(MCBinaryExpr::Sub, Res1, Res2, MCOS.getContext());
return Res3;
@@ -247,7 +247,7 @@ static const MCExpr *forceExpAbs(MCStreamer &OS, const MCExpr* Expr) {
MCSymbol *ABS = Context.createTempSymbol();
OS.EmitAssignment(ABS, Expr);
- return MCSymbolRefExpr::Create(ABS, Context);
+ return MCSymbolRefExpr::create(ABS, Context);
static void emitAbsValue(MCStreamer &OS, const MCExpr *Value, unsigned Size) {
@@ -616,7 +616,7 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
assert(StartSymbol && "StartSymbol must not be NULL");
assert(EndSymbol && "EndSymbol must not be NULL");
- const MCExpr *Addr = MCSymbolRefExpr::Create(
+ const MCExpr *Addr = MCSymbolRefExpr::create(
StartSymbol, MCSymbolRefExpr::VK_None, context);
const MCExpr *Size = MakeStartMinusEndExpr(*MCOS,
*StartSymbol, *EndSymbol, 0);
@@ -705,12 +705,12 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
assert(EndSymbol && "EndSymbol must not be NULL");
// AT_low_pc, the first address of the default .text section.
- const MCExpr *Start = MCSymbolRefExpr::Create(
+ const MCExpr *Start = MCSymbolRefExpr::create(
StartSymbol, MCSymbolRefExpr::VK_None, context);
MCOS->EmitValue(Start, AddrSize);
// AT_high_pc, the last address of the default .text section.
- const MCExpr *End = MCSymbolRefExpr::Create(
+ const MCExpr *End = MCSymbolRefExpr::create(
EndSymbol, MCSymbolRefExpr::VK_None, context);
MCOS->EmitValue(End, AddrSize);
@@ -772,7 +772,7 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
MCOS->EmitIntValue(Entry.getLineNumber(), 4);
// AT_low_pc, start address of the label.
- const MCExpr *AT_low_pc = MCSymbolRefExpr::Create(Entry.getLabel(),
+ const MCExpr *AT_low_pc = MCSymbolRefExpr::create(Entry.getLabel(),
MCSymbolRefExpr::VK_None, context);
MCOS->EmitValue(AT_low_pc, AddrSize);
@@ -812,7 +812,7 @@ static void EmitGenDwarfRanges(MCStreamer *MCOS) {
assert(EndSymbol && "EndSymbol must not be NULL");
// Emit a base address selection entry for the start of this section
- const MCExpr *SectionStartAddr = MCSymbolRefExpr::Create(
+ const MCExpr *SectionStartAddr = MCSymbolRefExpr::create(
StartSymbol, MCSymbolRefExpr::VK_None, context);
MCOS->EmitFill(AddrSize, 0xFF);
MCOS->EmitValue(SectionStartAddr, AddrSize);
diff --git a/lib/MC/MCELF.cpp b/lib/MC/MCELF.cpp
deleted file mode 100644
index 3690634..0000000
--- a/lib/MC/MCELF.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-//===- lib/MC/MCELF.cpp - MC ELF ------------------------------------------===//
-// The LLVM Compiler Infrastructure
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// This file implements ELF object file writer information.
-#include "llvm/MC/MCELF.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCELFSymbolFlags.h"
-#include "llvm/MC/MCFixupKindInfo.h"
-#include "llvm/Support/ELF.h"
-namespace llvm {
-void MCELF::SetBinding(MCSymbolData &SD, unsigned Binding) {
- assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
- Binding == ELF::STB_WEAK || Binding == ELF::STB_GNU_UNIQUE);
- uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift);
- SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift));
-unsigned MCELF::GetBinding(const MCSymbolData &SD) {
- uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift;
- assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
- Binding == ELF::STB_WEAK || Binding == ELF::STB_GNU_UNIQUE);
- return Binding;
-void MCELF::SetType(MCSymbolData &SD, unsigned Type) {
- assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
- Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
- Type == ELF::STT_COMMON || Type == ELF::STT_TLS ||
- Type == ELF::STT_GNU_IFUNC);
- uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift);
- SD.setFlags(OtherFlags | (Type << ELF_STT_Shift));
-unsigned MCELF::GetType(const MCSymbolData &SD) {
- uint32_t Type = (SD.getFlags() & (0xf << ELF_STT_Shift)) >> ELF_STT_Shift;
- assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
- Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
- Type == ELF::STT_COMMON || Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC);
- return Type;
-// Visibility is stored in the first two bits of st_other
-// st_other values are stored in the second byte of get/setFlags
-void MCELF::SetVisibility(MCSymbolData &SD, unsigned Visibility) {
- assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
- Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
- uint32_t OtherFlags = SD.getFlags() & ~(0x3 << ELF_STV_Shift);
- SD.setFlags(OtherFlags | (Visibility << ELF_STV_Shift));
-unsigned MCELF::GetVisibility(const MCSymbolData &SD) {
- unsigned Visibility =
- (SD.getFlags() & (0x3 << ELF_STV_Shift)) >> ELF_STV_Shift;
- assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
- Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
- return Visibility;
-// Other is stored in the last six bits of st_other
-// st_other values are stored in the second byte of get/setFlags
-void MCELF::setOther(MCSymbolData &SD, unsigned Other) {
- uint32_t OtherFlags = SD.getFlags() & ~(0x3f << ELF_STO_Shift);
- SD.setFlags(OtherFlags | (Other << ELF_STO_Shift));
-unsigned MCELF::getOther(const MCSymbolData &SD) {
- unsigned Other =
- (SD.getFlags() & (0x3f << ELF_STO_Shift)) >> ELF_STO_Shift;
- return Other;
diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp
index dc3d6c3..bc0ba85 100644
--- a/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/lib/MC/MCELFObjectTargetWriter.cpp
@@ -24,7 +24,7 @@ MCELFObjectTargetWriter::MCELFObjectTargetWriter(bool Is64Bit_,
-bool MCELFObjectTargetWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
+bool MCELFObjectTargetWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const {
return false;
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 653a1d2..e0f4a2a 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -20,14 +20,13 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCELF.h"
-#include "llvm/MC/MCELFSymbolFlags.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Debug.h"
@@ -39,7 +38,7 @@
using namespace llvm;
bool MCELFStreamer::isBundleLocked() const {
- return getCurrentSectionData()->isBundleLocked();
+ return getCurrentSectionOnly()->isBundleLocked();
MCELFStreamer::~MCELFStreamer() {
@@ -106,16 +105,16 @@ void MCELFStreamer::InitSections(bool NoExecStack) {
-void MCELFStreamer::EmitLabel(MCSymbol *Symbol) {
+void MCELFStreamer::EmitLabel(MCSymbol *S) {
+ auto *Symbol = cast<MCSymbolELF>(S);
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
const MCSectionELF &Section =
static_cast<const MCSectionELF&>(Symbol->getSection());
- MCSymbolData &SD = getAssembler().getSymbolData(*Symbol);
if (Section.getFlags() & ELF::SHF_TLS)
+ Symbol->setType(ELF::STT_TLS);
void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
@@ -146,7 +145,7 @@ static void setSectionAlignmentForBundling(const MCAssembler &Assembler,
void MCELFStreamer::ChangeSection(MCSection *Section,
const MCExpr *Subsection) {
- MCSection *CurSection = getCurrentSectionData();
+ MCSection *CurSection = getCurrentSectionOnly();
if (CurSection && isBundleLocked())
report_fatal_error("Unterminated .bundle_lock when changing a section");
@@ -156,19 +155,24 @@ void MCELFStreamer::ChangeSection(MCSection *Section,
auto *SectionELF = static_cast<const MCSectionELF *>(Section);
const MCSymbol *Grp = SectionELF->getGroup();
if (Grp)
- Asm.getOrCreateSymbolData(*Grp);
+ Asm.registerSymbol(*Grp);
this->MCObjectStreamer::ChangeSection(Section, Subsection);
- MCSymbol *SectionSymbol = getContext().getOrCreateSectionSymbol(*SectionELF);
- if (SectionSymbol->isUndefined()) {
- EmitLabel(SectionSymbol);
- MCELF::SetType(Asm.getSymbolData(*SectionSymbol), ELF::STT_SECTION);
+ MCContext &Ctx = getContext();
+ auto *Begin = cast_or_null<MCSymbolELF>(Section->getBeginSymbol());
+ if (!Begin) {
+ Begin = Ctx.getOrCreateSectionSymbol(*SectionELF);
+ Section->setBeginSymbol(Begin);
+ }
+ if (Begin->isUndefined()) {
+ Asm.registerSymbol(*Begin);
+ Begin->setType(ELF::STT_SECTION);
void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
- getAssembler().getOrCreateSymbolData(*Symbol);
- const MCExpr *Value = MCSymbolRefExpr::Create(
+ getAssembler().registerSymbol(*Symbol);
+ const MCExpr *Value = MCSymbolRefExpr::create(
Symbol, MCSymbolRefExpr::VK_WEAKREF, getContext());
@@ -192,8 +196,8 @@ static unsigned CombineSymbolTypes(unsigned T1, unsigned T2) {
return T2;
-bool MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
- MCSymbolAttr Attribute) {
+bool MCELFStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
+ auto *Symbol = cast<MCSymbolELF>(S);
// Indirect symbols are handled differently, to match how 'as' handles
// them. This makes writing matching .o files easier.
if (Attribute == MCSA_IndirectSymbol) {
@@ -201,15 +205,15 @@ bool MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
// important for matching the string table that 'as' generates.
IndirectSymbolData ISD;
ISD.Symbol = Symbol;
- ISD.Section = getCurrentSectionData();
+ ISD.Section = getCurrentSectionOnly();
return true;
// Adding a symbol attribute always introduces the symbol, note that an
- // important side effect of calling getOrCreateSymbolData here is to register
+ // important side effect of calling registerSymbol here is to register
// the symbol with the assembler.
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ getAssembler().registerSymbol(*Symbol);
// The implementation of symbol attributes is designed to match 'as', but it
// leaves much to desired. It doesn't really make sense to arbitrarily add and
@@ -233,90 +237,81 @@ bool MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_ELF_TypeGnuUniqueObject:
- MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD), ELF::STT_OBJECT));
- SD.setExternal(true);
- BindingExplicitlySet.insert(Symbol);
+ Symbol->setType(CombineSymbolTypes(Symbol->getType(), ELF::STT_OBJECT));
+ Symbol->setBinding(ELF::STB_GNU_UNIQUE);
+ Symbol->setExternal(true);
case MCSA_Global:
- SD.setExternal(true);
- BindingExplicitlySet.insert(Symbol);
+ Symbol->setBinding(ELF::STB_GLOBAL);
+ Symbol->setExternal(true);
case MCSA_WeakReference:
case MCSA_Weak:
- MCELF::SetBinding(SD, ELF::STB_WEAK);
- SD.setExternal(true);
- BindingExplicitlySet.insert(Symbol);
+ Symbol->setBinding(ELF::STB_WEAK);
+ Symbol->setExternal(true);
case MCSA_Local:
- MCELF::SetBinding(SD, ELF::STB_LOCAL);
- SD.setExternal(false);
- BindingExplicitlySet.insert(Symbol);
+ Symbol->setBinding(ELF::STB_LOCAL);
+ Symbol->setExternal(false);
case MCSA_ELF_TypeFunction:
- MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+ Symbol->setType(CombineSymbolTypes(Symbol->getType(), ELF::STT_FUNC));
case MCSA_ELF_TypeIndFunction:
- MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+ Symbol->setType(CombineSymbolTypes(Symbol->getType(), ELF::STT_GNU_IFUNC));
case MCSA_ELF_TypeObject:
- MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+ Symbol->setType(CombineSymbolTypes(Symbol->getType(), ELF::STT_OBJECT));
case MCSA_ELF_TypeTLS:
- MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+ Symbol->setType(CombineSymbolTypes(Symbol->getType(), ELF::STT_TLS));
case MCSA_ELF_TypeCommon:
// TODO: Emit these as a common symbol.
- MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+ Symbol->setType(CombineSymbolTypes(Symbol->getType(), ELF::STT_OBJECT));
case MCSA_ELF_TypeNoType:
- MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+ Symbol->setType(CombineSymbolTypes(Symbol->getType(), ELF::STT_NOTYPE));
case MCSA_Protected:
+ Symbol->setVisibility(ELF::STV_PROTECTED);
case MCSA_Hidden:
- MCELF::SetVisibility(SD, ELF::STV_HIDDEN);
+ Symbol->setVisibility(ELF::STV_HIDDEN);
case MCSA_Internal:
- MCELF::SetVisibility(SD, ELF::STV_INTERNAL);
+ Symbol->setVisibility(ELF::STV_INTERNAL);
return true;
-void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment) {
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+void MCELFStreamer::EmitCommonSymbol(MCSymbol *S, uint64_t Size,
+ unsigned ByteAlignment) {
+ auto *Symbol = cast<MCSymbolELF>(S);
+ getAssembler().registerSymbol(*Symbol);
- if (!BindingExplicitlySet.count(Symbol)) {
- SD.setExternal(true);
+ if (!Symbol->isBindingSet()) {
+ Symbol->setBinding(ELF::STB_GLOBAL);
+ Symbol->setExternal(true);
+ Symbol->setType(ELF::STT_OBJECT);
- if (MCELF::GetBinding(SD) == ELF_STB_Local) {
+ if (Symbol->getBinding() == ELF::STB_LOCAL) {
MCSection *Section = getAssembler().getContext().getELFSection(
@@ -325,24 +320,26 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
struct LocalCommon L = {Symbol, Size, ByteAlignment};
} else {
- SD.setCommon(Size, ByteAlignment);
+ if(Symbol->declareCommon(Size, ByteAlignment))
+ report_fatal_error("Symbol: " + Symbol->getName() +
+ " redeclared as different type");
- SD.setSize(MCConstantExpr::Create(Size, getContext()));
+ cast<MCSymbolELF>(Symbol)
+ ->setSize(MCConstantExpr::create(Size, getContext()));
-void MCELFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
- SD.setSize(Value);
+void MCELFStreamer::emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value) {
+ Symbol->setSize(Value);
-void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *S, uint64_t Size,
unsigned ByteAlignment) {
+ auto *Symbol = cast<MCSymbolELF>(S);
// FIXME: Should this be caught and done earlier?
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
- MCELF::SetBinding(SD, ELF::STB_LOCAL);
- SD.setExternal(false);
- BindingExplicitlySet.insert(Symbol);
+ getAssembler().registerSymbol(*Symbol);
+ Symbol->setBinding(ELF::STB_LOCAL);
+ Symbol->setExternal(false);
EmitCommonSymbol(Symbol, Size, ByteAlignment);
@@ -456,8 +453,8 @@ void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
case MCSymbolRefExpr::VK_PPC_TLSLD:
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(symRef.getSymbol());
+ getAssembler().registerSymbol(symRef.getSymbol());
+ cast<MCSymbolELF>(symRef.getSymbol()).setType(ELF::STT_TLS);
@@ -506,7 +503,7 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst,
MCDataFragment *DF;
if (Assembler.isBundlingEnabled()) {
- MCSection &Sec = *getCurrentSectionData();
+ MCSection &Sec = *getCurrentSectionOnly();
if (Assembler.getRelaxAll() && isBundleLocked())
// If the -mc-relax-all flag is used and we are bundle-locked, we re-use
// the current bundle group.
@@ -574,7 +571,7 @@ void MCELFStreamer::EmitBundleAlignMode(unsigned AlignPow2) {
void MCELFStreamer::EmitBundleLock(bool AlignToEnd) {
- MCSection &Sec = *getCurrentSectionData();
+ MCSection &Sec = *getCurrentSectionOnly();
// Sanity checks
@@ -595,7 +592,7 @@ void MCELFStreamer::EmitBundleLock(bool AlignToEnd) {
void MCELFStreamer::EmitBundleUnlock() {
- MCSection &Sec = *getCurrentSectionData();
+ MCSection &Sec = *getCurrentSectionOnly();
// Sanity checks
if (!getAssembler().isBundlingEnabled())
@@ -606,7 +603,7 @@ void MCELFStreamer::EmitBundleUnlock() {
report_fatal_error("Empty bundle-locked group is forbidden");
// When the -mc-relax-all flag is used, we emit instructions to fragments
- // stored on a stack. When the bundle unlock is emited, we pop a fragment
+ // stored on a stack. When the bundle unlock is emited, we pop a fragment
// from the stack a merge it to the one below.
if (getAssembler().getRelaxAll()) {
assert(!BundleGroups.empty() && "There are no bundle groups");
@@ -641,7 +638,7 @@ void MCELFStreamer::Flush() {
new MCAlignFragment(ByteAlignment, 0, 1, ByteAlignment, &Section);
MCFragment *F = new MCFillFragment(0, 0, Size, &Section);
- Symbol.getData().setFragment(F);
+ Symbol.setFragment(F);
// Update the maximum alignment of the section if necessary.
if (ByteAlignment > Section.getAlignment())
@@ -653,7 +650,7 @@ void MCELFStreamer::Flush() {
void MCELFStreamer::FinishImpl() {
// Ensure the last section gets aligned if necessary.
- MCSection *CurSection = getCurrentSectionData();
+ MCSection *CurSection = getCurrentSectionOnly();
setSectionAlignmentForBundling(getAssembler(), CurSection);
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index 7f048d7..b16245a 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -30,10 +30,10 @@ STATISTIC(MCExprEvaluate, "Number of MCExpr evaluations");
-void MCExpr::print(raw_ostream &OS) const {
+void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
switch (getKind()) {
case MCExpr::Target:
- return cast<MCTargetExpr>(this)->PrintImpl(OS);
+ return cast<MCTargetExpr>(this)->printImpl(OS, MAI);
case MCExpr::Constant:
OS << cast<MCConstantExpr>(*this).getValue();
@@ -44,10 +44,12 @@ void MCExpr::print(raw_ostream &OS) const {
// Parenthesize names that start with $ so that they don't look like
// absolute names.
bool UseParens = Sym.getName()[0] == '$';
- if (UseParens)
- OS << '(' << Sym << ')';
- else
- OS << Sym;
+ if (UseParens) {
+ OS << '(';
+ Sym.print(OS, MAI);
+ OS << ')';
+ } else
+ Sym.print(OS, MAI);
if (SRE.getKind() != MCSymbolRefExpr::VK_None)
@@ -63,7 +65,7 @@ void MCExpr::print(raw_ostream &OS) const {
case MCUnaryExpr::Not: OS << '~'; break;
case MCUnaryExpr::Plus: OS << '+'; break;
- OS << *UE.getSubExpr();
+ UE.getSubExpr()->print(OS, MAI);
@@ -72,9 +74,11 @@ void MCExpr::print(raw_ostream &OS) const {
// Only print parens around the LHS if it is non-trivial.
if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS())) {
- OS << *BE.getLHS();
+ BE.getLHS()->print(OS, MAI);
} else {
- OS << '(' << *BE.getLHS() << ')';
+ OS << '(';
+ BE.getLHS()->print(OS, MAI);
+ OS << ')';
switch (BE.getOpcode()) {
@@ -111,9 +115,11 @@ void MCExpr::print(raw_ostream &OS) const {
// Only print parens around the LHS if it is non-trivial.
if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
- OS << *BE.getRHS();
+ BE.getRHS()->print(OS, MAI);
} else {
- OS << '(' << *BE.getRHS() << ')';
+ OS << '(';
+ BE.getRHS()->print(OS, MAI);
+ OS << ')';
@@ -131,17 +137,17 @@ void MCExpr::dump() const {
/* *** */
-const MCBinaryExpr *MCBinaryExpr::Create(Opcode Opc, const MCExpr *LHS,
+const MCBinaryExpr *MCBinaryExpr::create(Opcode Opc, const MCExpr *LHS,
const MCExpr *RHS, MCContext &Ctx) {
return new (Ctx) MCBinaryExpr(Opc, LHS, RHS);
-const MCUnaryExpr *MCUnaryExpr::Create(Opcode Opc, const MCExpr *Expr,
+const MCUnaryExpr *MCUnaryExpr::create(Opcode Opc, const MCExpr *Expr,
MCContext &Ctx) {
return new (Ctx) MCUnaryExpr(Opc, Expr);
-const MCConstantExpr *MCConstantExpr::Create(int64_t Value, MCContext &Ctx) {
+const MCConstantExpr *MCConstantExpr::create(int64_t Value, MCContext &Ctx) {
return new (Ctx) MCConstantExpr(Value);
@@ -156,15 +162,15 @@ MCSymbolRefExpr::MCSymbolRefExpr(const MCSymbol *Symbol, VariantKind Kind,
-const MCSymbolRefExpr *MCSymbolRefExpr::Create(const MCSymbol *Sym,
+const MCSymbolRefExpr *MCSymbolRefExpr::create(const MCSymbol *Sym,
VariantKind Kind,
MCContext &Ctx) {
return new (Ctx) MCSymbolRefExpr(Sym, Kind, Ctx.getAsmInfo());
-const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, VariantKind Kind,
+const MCSymbolRefExpr *MCSymbolRefExpr::create(StringRef Name, VariantKind Kind,
MCContext &Ctx) {
- return Create(Ctx.getOrCreateSymbol(Name), Kind, Ctx);
+ return create(Ctx.getOrCreateSymbol(Name), Kind, Ctx);
StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
@@ -400,23 +406,23 @@ void MCTargetExpr::anchor() {}
/* *** */
-bool MCExpr::EvaluateAsAbsolute(int64_t &Res) const {
- return EvaluateAsAbsolute(Res, nullptr, nullptr, nullptr);
+bool MCExpr::evaluateAsAbsolute(int64_t &Res) const {
+ return evaluateAsAbsolute(Res, nullptr, nullptr, nullptr);
-bool MCExpr::EvaluateAsAbsolute(int64_t &Res,
+bool MCExpr::evaluateAsAbsolute(int64_t &Res,
const MCAsmLayout &Layout) const {
- return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr);
+ return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr);
-bool MCExpr::EvaluateAsAbsolute(int64_t &Res,
+bool MCExpr::evaluateAsAbsolute(int64_t &Res,
const MCAsmLayout &Layout,
const SectionAddrMap &Addrs) const {
- return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, &Addrs);
+ return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, &Addrs);
-bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const {
- return EvaluateAsAbsolute(Res, &Asm, nullptr, nullptr);
+bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const {
+ return evaluateAsAbsolute(Res, &Asm, nullptr, nullptr);
bool MCExpr::evaluateKnownAbsolute(int64_t &Res,
@@ -425,7 +431,7 @@ bool MCExpr::evaluateKnownAbsolute(int64_t &Res,
-bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
+bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
const MCAsmLayout *Layout,
const SectionAddrMap *Addrs) const {
// FIXME: The use if InSet = Addrs is a hack. Setting InSet causes us
@@ -446,7 +452,7 @@ bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
bool IsRelocatable =
- EvaluateAsRelocatableImpl(Value, Asm, Layout, nullptr, Addrs, InSet);
+ evaluateAsRelocatableImpl(Value, Asm, Layout, nullptr, Addrs, InSet);
// Record the current value.
Res = Value.getConstant();
@@ -468,14 +474,11 @@ static void AttemptToFoldSymbolOffsetDifference(
if (SA.isUndefined() || SB.isUndefined())
- if (!Asm->getWriter().IsSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet))
+ if (!Asm->getWriter().isSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet))
- const MCSymbolData &AD = Asm->getSymbolData(SA);
- const MCSymbolData &BD = Asm->getSymbolData(SB);
- if (AD.getFragment() == BD.getFragment()) {
- Addend += (AD.getOffset() - BD.getOffset());
+ if (SA.getFragment() == SB.getFragment()) {
+ Addend += (SA.getOffset() - SB.getOffset());
// Pointers to Thumb symbols need to have their low-bit set to allow
// for interworking.
@@ -491,8 +494,8 @@ static void AttemptToFoldSymbolOffsetDifference(
if (!Layout)
- const MCSection &SecA = *AD.getFragment()->getParent();
- const MCSection &SecB = *BD.getFragment()->getParent();
+ const MCSection &SecA = *SA.getFragment()->getParent();
+ const MCSection &SecB = *SB.getFragment()->getParent();
if ((&SecA != &SecB) && !Addrs)
@@ -589,21 +592,28 @@ EvaluateSymbolicAdd(const MCAssembler *Asm, const MCAsmLayout *Layout,
return true;
-bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
+bool MCExpr::evaluateAsRelocatable(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const {
MCAssembler *Assembler = Layout ? &Layout->getAssembler() : nullptr;
- return EvaluateAsRelocatableImpl(Res, Assembler, Layout, Fixup, nullptr,
+ return evaluateAsRelocatableImpl(Res, Assembler, Layout, Fixup, nullptr,
bool MCExpr::evaluateAsValue(MCValue &Res, const MCAsmLayout &Layout) const {
MCAssembler *Assembler = &Layout.getAssembler();
- return EvaluateAsRelocatableImpl(Res, Assembler, &Layout, nullptr, nullptr,
+ return evaluateAsRelocatableImpl(Res, Assembler, &Layout, nullptr, nullptr,
static bool canExpand(const MCSymbol &Sym, const MCAssembler *Asm, bool InSet) {
+ const MCExpr *Expr = Sym.getVariableValue();
+ const auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr);
+ if (Inner) {
+ if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF)
+ return false;
+ }
if (InSet)
return true;
if (!Asm)
@@ -611,7 +621,7 @@ static bool canExpand(const MCSymbol &Sym, const MCAssembler *Asm, bool InSet) {
return !Asm->getWriter().isWeak(Sym);
-bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
+bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
const MCAsmLayout *Layout,
const MCFixup *Fixup,
const SectionAddrMap *Addrs,
@@ -620,7 +630,7 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
switch (getKind()) {
case Target:
- return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res, Layout,
+ return cast<MCTargetExpr>(this)->evaluateAsRelocatableImpl(Res, Layout,
case Constant:
@@ -635,7 +645,7 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None &&
canExpand(Sym, Asm, InSet)) {
bool IsMachO = SRE->hasSubsectionsViaSymbols();
- if (Sym.getVariableValue()->EvaluateAsRelocatableImpl(
+ if (Sym.getVariableValue()->evaluateAsRelocatableImpl(
Res, Asm, Layout, Fixup, Addrs, InSet || IsMachO)) {
if (!IsMachO)
return true;
@@ -661,7 +671,7 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this);
MCValue Value;
- if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout, Fixup,
+ if (!AUE->getSubExpr()->evaluateAsRelocatableImpl(Value, Asm, Layout, Fixup,
Addrs, InSet))
return false;
@@ -695,9 +705,9 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this);
MCValue LHSValue, RHSValue;
- if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout, Fixup,
+ if (!ABE->getLHS()->evaluateAsRelocatableImpl(LHSValue, Asm, Layout, Fixup,
Addrs, InSet) ||
- !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout, Fixup,
+ !ABE->getRHS()->evaluateAsRelocatableImpl(RHSValue, Asm, Layout, Fixup,
Addrs, InSet))
return false;
@@ -755,11 +765,11 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
llvm_unreachable("Invalid assembly expression kind!");
-MCSection *MCExpr::FindAssociatedSection() const {
+MCSection *MCExpr::findAssociatedSection() const {
switch (getKind()) {
case Target:
// We never look through target specific expressions.
- return cast<MCTargetExpr>(this)->FindAssociatedSection();
+ return cast<MCTargetExpr>(this)->findAssociatedSection();
case Constant:
return MCSymbol::AbsolutePseudoSection;
@@ -775,12 +785,12 @@ MCSection *MCExpr::FindAssociatedSection() const {
case Unary:
- return cast<MCUnaryExpr>(this)->getSubExpr()->FindAssociatedSection();
+ return cast<MCUnaryExpr>(this)->getSubExpr()->findAssociatedSection();
case Binary: {
const MCBinaryExpr *BE = cast<MCBinaryExpr>(this);
- MCSection *LHS_S = BE->getLHS()->FindAssociatedSection();
- MCSection *RHS_S = BE->getRHS()->FindAssociatedSection();
+ MCSection *LHS_S = BE->getLHS()->findAssociatedSection();
+ MCSection *RHS_S = BE->getRHS()->findAssociatedSection();
// If either section is absolute, return the other.
if (LHS_S == MCSymbol::AbsolutePseudoSection)
diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp
index 0dc3121..23afe80 100644
--- a/lib/MC/MCInstPrinter.cpp
+++ b/lib/MC/MCInstPrinter.cpp
@@ -16,6 +16,15 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+void llvm::dumpBytes(ArrayRef<uint8_t> bytes, raw_ostream &OS) {
+ static const char hex_rep[] = "0123456789abcdef";
+ for (char i: bytes) {
+ OS << hex_rep[(i & 0xF0) >> 4];
+ OS << hex_rep[i & 0xF];
+ OS << ' ';
+ }
MCInstPrinter::~MCInstPrinter() {
diff --git a/lib/MC/MCLinkerOptimizationHint.cpp b/lib/MC/MCLinkerOptimizationHint.cpp
index 2c9c67c..5f6a579 100644
--- a/lib/MC/MCLinkerOptimizationHint.cpp
+++ b/lib/MC/MCLinkerOptimizationHint.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
// - Its argN.
// <arg1> to <argN> are absolute addresses in the object file, i.e.,
// relative addresses from the beginning of the object file.
-void MCLOHDirective::Emit_impl(raw_ostream &OutStream,
+void MCLOHDirective::emit_impl(raw_ostream &OutStream,
const MachObjectWriter &ObjWriter,
const MCAsmLayout &Layout) const {
encodeULEB128(Kind, OutStream);
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 6297340..53cd131 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -18,12 +18,11 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
-#include "llvm/MC/MCMachOSymbolFlags.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolMachO.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
@@ -93,9 +92,6 @@ public:
void EndCOFFSymbolDef() override {
llvm_unreachable("macho doesn't support this directive");
- void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) override {
- llvm_unreachable("macho doesn't support this directive");
- }
void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override;
void EmitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr,
@@ -162,22 +158,22 @@ void MCMachOStreamer::ChangeSection(MCSection *Section,
// Output a linker-local symbol so we don't need section-relative local
// relocations. The linker hates us when we do that.
- if (LabelSections && !HasSectionLabel[Section]) {
+ if (LabelSections && !HasSectionLabel[Section] &&
+ !Section->getBeginSymbol()) {
MCSymbol *Label = getContext().createLinkerPrivateTempSymbol();
- EmitLabel(Label);
+ Section->setBeginSymbol(Label);
HasSectionLabel[Section] = true;
void MCMachOStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
MCSymbol *EHSymbol) {
- MCSymbolData &SD =
- getAssembler().getOrCreateSymbolData(*Symbol);
- if (SD.isExternal())
+ getAssembler().registerSymbol(*Symbol);
+ if (Symbol->isExternal())
EmitSymbolAttribute(EHSymbol, MCSA_Global);
- if (SD.getFlags() & SF_WeakDefinition)
+ if (cast<MCSymbolMachO>(Symbol)->isWeakDefinition())
EmitSymbolAttribute(EHSymbol, MCSA_WeakDefinition);
- if (SD.isPrivateExtern())
+ if (Symbol->isPrivateExtern())
EmitSymbolAttribute(EHSymbol, MCSA_PrivateExtern);
@@ -193,7 +189,6 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
- MCSymbolData &SD = getAssembler().getSymbolData(*Symbol);
// This causes the reference type flag to be cleared. Darwin 'as' was "trying"
// to clear the weak reference and weak definition bits too, but the
// implementation was buggy. For now we just try to match 'as', for
@@ -201,7 +196,7 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
// FIXME: Cleanup this code, these bits should be emitted based on semantic
// properties, not on the order of definition, etc.
- SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask);
+ cast<MCSymbolMachO>(Symbol)->clearReferenceType();
void MCMachOStreamer::EmitDataRegion(DataRegionData::KindTy Kind) {
@@ -276,10 +271,13 @@ void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) {
// Remember that the function is a thumb function. Fixup and relocation
// values will need adjusted.
+ cast<MCSymbolMachO>(Symbol)->setThumbFunc();
-bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Sym,
MCSymbolAttr Attribute) {
+ MCSymbolMachO *Symbol = cast<MCSymbolMachO>(Sym);
// Indirect symbols are handled differently, to match how 'as' handles
// them. This makes writing matching .o files easier.
if (Attribute == MCSA_IndirectSymbol) {
@@ -287,15 +285,15 @@ bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
// important for matching the string table that 'as' generates.
IndirectSymbolData ISD;
ISD.Symbol = Symbol;
- ISD.Section = getCurrentSectionData();
+ ISD.Section = getCurrentSectionOnly();
return true;
// Adding a symbol attribute always introduces the symbol, note that an
- // important side effect of calling getOrCreateSymbolData here is to register
+ // important side effect of calling registerSymbol here is to register
// the symbol with the assembler.
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ getAssembler().registerSymbol(*Symbol);
// The implementation of symbol attributes is designed to match 'as', but it
// leaves much to desired. It doesn't really make sense to arbitrarily add and
@@ -321,53 +319,54 @@ bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
return false;
case MCSA_Global:
- SD.setExternal(true);
+ Symbol->setExternal(true);
// This effectively clears the undefined lazy bit, in Darwin 'as', although
// it isn't very consistent because it implements this as part of symbol
// lookup.
// FIXME: Cleanup this code, these bits should be emitted based on semantic
// properties, not on the order of definition, etc.
- SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeUndefinedLazy);
+ Symbol->setReferenceTypeUndefinedLazy(false);
case MCSA_LazyReference:
// FIXME: This requires -dynamic.
- SD.setFlags(SD.getFlags() | SF_NoDeadStrip);
+ Symbol->setNoDeadStrip();
if (Symbol->isUndefined())
- SD.setFlags(SD.getFlags() | SF_ReferenceTypeUndefinedLazy);
+ Symbol->setReferenceTypeUndefinedLazy(true);
// Since .reference sets the no dead strip bit, it is equivalent to
// .no_dead_strip in practice.
case MCSA_Reference:
case MCSA_NoDeadStrip:
- SD.setFlags(SD.getFlags() | SF_NoDeadStrip);
+ Symbol->setNoDeadStrip();
case MCSA_SymbolResolver:
- SD.setFlags(SD.getFlags() | SF_SymbolResolver);
+ Symbol->setSymbolResolver();
case MCSA_PrivateExtern:
- SD.setExternal(true);
- SD.setPrivateExtern(true);
+ Symbol->setExternal(true);
+ Symbol->setPrivateExtern(true);
case MCSA_WeakReference:
// FIXME: This requires -dynamic.
if (Symbol->isUndefined())
- SD.setFlags(SD.getFlags() | SF_WeakReference);
+ Symbol->setWeakReference();
case MCSA_WeakDefinition:
// FIXME: 'as' enforces that this is defined and global. The manual claims
// it has to be in a coalesced section, but this isn't enforced.
- SD.setFlags(SD.getFlags() | SF_WeakDefinition);
+ Symbol->setWeakDefinition();
case MCSA_WeakDefAutoPrivate:
- SD.setFlags(SD.getFlags() | SF_WeakDefinition | SF_WeakReference);
+ Symbol->setWeakDefinition();
+ Symbol->setWeakReference();
@@ -376,10 +375,8 @@ bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
// Encode the 'desc' value into the lowest implementation defined bits.
- assert(DescValue == (DescValue & SF_DescFlagsMask) &&
- "Invalid .desc value!");
- getAssembler().getOrCreateSymbolData(*Symbol).setFlags(
- DescValue & SF_DescFlagsMask);
+ getAssembler().registerSymbol(*Symbol);
+ cast<MCSymbolMachO>(Symbol)->setDesc(DescValue);
void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
@@ -389,9 +386,9 @@ void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
AssignSection(Symbol, nullptr);
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
- SD.setExternal(true);
- SD.setCommon(Size, ByteAlignment);
+ getAssembler().registerSymbol(*Symbol);
+ Symbol->setExternal(true);
+ Symbol->setCommon(Size, ByteAlignment);
void MCMachOStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
@@ -414,17 +411,17 @@ void MCMachOStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ getAssembler().registerSymbol(*Symbol);
// Emit an align fragment if necessary.
if (ByteAlignment != 1)
new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, Section);
- MCFragment *F = new MCFillFragment(0, 0, Size, Section);
- SD.setFragment(F);
AssignSection(Symbol, Section);
+ MCFragment *F = new MCFillFragment(0, 0, Size, Section);
+ Symbol->setFragment(F);
// Update the maximum alignment on the zero fill section if necessary.
if (ByteAlignment > Section->getAlignment())
@@ -466,11 +463,11 @@ void MCMachOStreamer::FinishImpl() {
// defining symbols.
DenseMap<const MCFragment *, const MCSymbol *> DefiningSymbolMap;
for (const MCSymbol &Symbol : getAssembler().symbols()) {
- MCSymbolData &SD = Symbol.getData();
- if (getAssembler().isSymbolLinkerVisible(Symbol) && SD.getFragment()) {
+ if (getAssembler().isSymbolLinkerVisible(Symbol) && Symbol.getFragment()) {
// An atom defining symbol should never be internal to a fragment.
- assert(SD.getOffset() == 0 && "Invalid offset in atom defining symbol!");
- DefiningSymbolMap[SD.getFragment()] = &Symbol;
+ assert(Symbol.getOffset() == 0 &&
+ "Invalid offset in atom defining symbol!");
+ DefiningSymbolMap[Symbol.getFragment()] = &Symbol;
diff --git a/lib/MC/MCMachObjectTargetWriter.cpp b/lib/MC/MCMachObjectTargetWriter.cpp
index 146cebf..4ffd6a7 100644
--- a/lib/MC/MCMachObjectTargetWriter.cpp
+++ b/lib/MC/MCMachObjectTargetWriter.cpp
@@ -11,12 +11,9 @@
using namespace llvm;
- bool Is64Bit_, uint32_t CPUType_, uint32_t CPUSubtype_,
- bool UseAggressiveSymbolFolding_)
- : Is64Bit(Is64Bit_), CPUType(CPUType_), CPUSubtype(CPUSubtype_),
- UseAggressiveSymbolFolding(UseAggressiveSymbolFolding_) {
+MCMachObjectTargetWriter::MCMachObjectTargetWriter(bool Is64Bit_,
+ uint32_t CPUType_,
+ uint32_t CPUSubtype_)
+ : Is64Bit(Is64Bit_), CPUType(CPUType_), CPUSubtype(CPUSubtype_) {}
-MCMachObjectTargetWriter::~MCMachObjectTargetWriter() {
+MCMachObjectTargetWriter::~MCMachObjectTargetWriter() {}
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index e99f036..83a08e2 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -39,7 +39,7 @@ static bool useCompactUnwind(const Triple &T) {
return false;
-void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
+void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
// MachO
SupportsWeakOmittedEHFrame = false;
@@ -241,7 +241,7 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
TLSExtraDataSection = TLSTLVSection;
-void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
+void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) {
switch (T.getArch()) {
case Triple::mips:
case Triple::mipsel:
@@ -324,10 +324,16 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
case Triple::mipsel:
case Triple::mips64:
case Triple::mips64el:
- // MIPS uses indirect pointer to refer personality functions, so that the
- // eh_frame section can be read-only. DW.ref.personality will be generated
- // for relocation.
+ // MIPS uses indirect pointer to refer personality functions and types, so
+ // that the eh_frame section can be read-only. DW.ref.personality will be
+ // generated for relocation.
PersonalityEncoding = dwarf::DW_EH_PE_indirect;
+ // FIXME: The N64 ABI probably ought to use DW_EH_PE_sdata8 but we can't
+ // identify N64 from just a triple.
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ // We don't support PC-relative LSDA references in GAS so we use the default
+ // DW_EH_PE_absptr for those.
case Triple::ppc64:
case Triple::ppc64le:
@@ -514,7 +520,7 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
Ctx->getELFSection(".llvm_stackmaps", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
-void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
+void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
bool IsWoA = T.getArch() == Triple::arm || T.getArch() == Triple::thumb;
CommDirectiveSupportsAlignment = true;
@@ -714,6 +720,9 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
+ SXDataSection = Ctx->getCOFFSection(".sxdata", COFF::IMAGE_SCN_LNK_INFO,
+ SectionKind::getMetadata());
TLSDataSection = Ctx->getCOFFSection(
@@ -756,15 +765,15 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef T, Reloc::Model relocm,
Arch == Triple::UnknownArch) &&
TT.isOSBinFormatMachO()) {
Env = IsMachO;
- InitMachOMCObjectFileInfo(TT);
+ initMachOMCObjectFileInfo(TT);
} else if ((Arch == Triple::x86 || Arch == Triple::x86_64 ||
Arch == Triple::arm || Arch == Triple::thumb) &&
(TT.isOSWindows() && TT.getObjectFormat() == Triple::COFF)) {
Env = IsCOFF;
- InitCOFFMCObjectFileInfo(TT);
+ initCOFFMCObjectFileInfo(TT);
} else {
Env = IsELF;
- InitELFMCObjectFileInfo(TT);
+ initELFMCObjectFileInfo(TT);
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index 176f5e7..6de02bc 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -29,7 +29,7 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
: MCStreamer(Context),
Assembler(new MCAssembler(Context, TAB, *Emitter_,
*TAB.createObjectWriter(OS), OS)),
- CurSectionData(nullptr), EmitEHFrame(true), EmitDebugFrame(false) {}
+ EmitEHFrame(true), EmitDebugFrame(false) {}
MCObjectStreamer::~MCObjectStreamer() {
delete &Assembler->getBackend();
@@ -42,12 +42,13 @@ void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) {
if (PendingLabels.size()) {
if (!F) {
F = new MCDataFragment();
- CurSectionData->getFragmentList().insert(CurInsertionPoint, F);
- F->setParent(CurSectionData);
+ MCSection *CurSection = getCurrentSectionOnly();
+ CurSection->getFragmentList().insert(CurInsertionPoint, F);
+ F->setParent(CurSection);
- for (MCSymbolData *SD : PendingLabels) {
- SD->setFragment(F);
- SD->setOffset(FOffset);
+ for (MCSymbol *Sym : PendingLabels) {
+ Sym->setFragment(F);
+ Sym->setOffset(FOffset);
@@ -56,30 +57,23 @@ void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) {
bool MCObjectStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi,
const MCSymbol *Lo,
unsigned Size) {
- // Must have symbol data.
- if (!Assembler->hasSymbolData(*Hi) || !Assembler->hasSymbolData(*Lo))
- return false;
- auto &HiD = Assembler->getSymbolData(*Hi);
- auto &LoD = Assembler->getSymbolData(*Lo);
// Must both be assigned to the same (valid) fragment.
- if (!HiD.getFragment() || HiD.getFragment() != LoD.getFragment())
+ if (!Hi->getFragment() || Hi->getFragment() != Lo->getFragment())
return false;
// Must be a data fragment.
- if (!isa<MCDataFragment>(HiD.getFragment()))
+ if (!isa<MCDataFragment>(Hi->getFragment()))
return false;
- assert(HiD.getOffset() >= LoD.getOffset() &&
+ assert(Hi->getOffset() >= Lo->getOffset() &&
"Expected Hi to be greater than Lo");
- EmitIntValue(HiD.getOffset() - LoD.getOffset(), Size);
+ EmitIntValue(Hi->getOffset() - Lo->getOffset(), Size);
return true;
void MCObjectStreamer::reset() {
if (Assembler)
- CurSectionData = nullptr;
CurInsertionPoint = MCSection::iterator();
EmitEHFrame = true;
EmitDebugFrame = false;
@@ -99,9 +93,9 @@ void MCObjectStreamer::EmitFrames(MCAsmBackend *MAB) {
MCFragment *MCObjectStreamer::getCurrentFragment() const {
- assert(getCurrentSectionData() && "No current section!");
+ assert(getCurrentSectionOnly() && "No current section!");
- if (CurInsertionPoint != getCurrentSectionData()->getFragmentList().begin())
+ if (CurInsertionPoint != getCurrentSectionOnly()->getFragmentList().begin())
return std::prev(CurInsertionPoint);
return nullptr;
@@ -120,7 +114,7 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() {
void MCObjectStreamer::visitUsedSymbol(const MCSymbol &Sym) {
- Assembler->getOrCreateSymbolData(Sym);
+ Assembler->registerSymbol(Sym);
void MCObjectStreamer::EmitCFISections(bool EH, bool Debug) {
@@ -138,7 +132,7 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
// Avoid fixups when possible.
int64_t AbsValue;
- if (Value->EvaluateAsAbsolute(AbsValue, getAssembler())) {
+ if (Value->evaluateAsAbsolute(AbsValue, getAssembler())) {
EmitIntValue(AbsValue, Size);
@@ -162,8 +156,8 @@ void MCObjectStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
- assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+ getAssembler().registerSymbol(*Symbol);
+ assert(!Symbol->getFragment() && "Unexpected fragment on symbol data!");
// If there is a current fragment, mark the symbol as pointing into it.
// Otherwise queue the label and set its fragment pointer when we emit the
@@ -171,16 +165,16 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
auto *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
if (F && !(getAssembler().isBundlingEnabled() &&
getAssembler().getRelaxAll())) {
- SD.setFragment(F);
- SD.setOffset(F->getContents().size());
+ Symbol->setFragment(F);
+ Symbol->setOffset(F->getContents().size());
} else {
- PendingLabels.push_back(&SD);
+ PendingLabels.push_back(Symbol);
void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) {
int64_t IntValue;
- if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) {
+ if (Value->evaluateAsAbsolute(IntValue, getAssembler())) {
@@ -189,7 +183,7 @@ void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) {
void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) {
int64_t IntValue;
- if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) {
+ if (Value->evaluateAsAbsolute(IntValue, getAssembler())) {
@@ -212,21 +206,20 @@ bool MCObjectStreamer::changeSectionImpl(MCSection *Section,
bool Created = getAssembler().registerSection(*Section);
- CurSectionData = Section;
int64_t IntSubsection = 0;
if (Subsection &&
- !Subsection->EvaluateAsAbsolute(IntSubsection, getAssembler()))
+ !Subsection->evaluateAsAbsolute(IntSubsection, getAssembler()))
report_fatal_error("Cannot evaluate subsection number");
if (IntSubsection < 0 || IntSubsection > 8192)
report_fatal_error("Subsection number out of range");
CurInsertionPoint =
- CurSectionData->getSubsectionInsertionPoint(unsigned(IntSubsection));
+ Section->getSubsectionInsertionPoint(unsigned(IntSubsection));
return Created;
void MCObjectStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
- getAssembler().getOrCreateSymbolData(*Symbol);
+ getAssembler().registerSymbol(*Symbol);
MCStreamer::EmitAssignment(Symbol, Value);
@@ -238,7 +231,7 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst,
const MCSubtargetInfo &STI) {
MCStreamer::EmitInstruction(Inst, STI);
- MCSection *Sec = getCurrentSectionData();
+ MCSection *Sec = getCurrentSectionOnly();
// Now that a machine instruction has been assembled into this section, make
@@ -323,10 +316,10 @@ static const MCExpr *buildSymbolDiff(MCObjectStreamer &OS, const MCSymbol *A,
const MCSymbol *B) {
MCContext &Context = OS.getContext();
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
- const MCExpr *ARef = MCSymbolRefExpr::Create(A, Variant, Context);
- const MCExpr *BRef = MCSymbolRefExpr::Create(B, Variant, Context);
+ const MCExpr *ARef = MCSymbolRefExpr::create(A, Variant, Context);
+ const MCExpr *BRef = MCSymbolRefExpr::create(B, Variant, Context);
const MCExpr *AddrDelta =
- MCBinaryExpr::Create(MCBinaryExpr::Sub, ARef, BRef, Context);
+ MCBinaryExpr::create(MCBinaryExpr::Sub, ARef, BRef, Context);
return AddrDelta;
@@ -352,7 +345,7 @@ void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
const MCExpr *AddrDelta = buildSymbolDiff(*this, Label, LastLabel);
int64_t Res;
- if (AddrDelta->EvaluateAsAbsolute(Res, getAssembler())) {
+ if (AddrDelta->evaluateAsAbsolute(Res, getAssembler())) {
MCDwarfLineAddr::Emit(this, LineDelta, Res);
@@ -363,7 +356,7 @@ void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
const MCSymbol *Label) {
const MCExpr *AddrDelta = buildSymbolDiff(*this, Label, LastLabel);
int64_t Res;
- if (AddrDelta->EvaluateAsAbsolute(Res, getAssembler())) {
+ if (AddrDelta->evaluateAsAbsolute(Res, getAssembler())) {
MCDwarfFrameEmitter::EmitAdvanceLoc(*this, Res);
@@ -398,7 +391,7 @@ void MCObjectStreamer::EmitCodeAlignment(unsigned ByteAlignment,
bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
unsigned char Value) {
int64_t Res;
- if (Offset->EvaluateAsAbsolute(Res, getAssembler())) {
+ if (Offset->evaluateAsAbsolute(Res, getAssembler())) {
insert(new MCOrgFragment(*Offset, Value));
return false;
@@ -407,11 +400,11 @@ bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
const MCExpr *Ref =
- MCSymbolRefExpr::Create(CurrentPos, Variant, getContext());
+ MCSymbolRefExpr::create(CurrentPos, Variant, getContext());
const MCExpr *Delta =
- MCBinaryExpr::Create(MCBinaryExpr::Sub, Offset, Ref, getContext());
+ MCBinaryExpr::create(MCBinaryExpr::Sub, Offset, Ref, getContext());
- if (!Delta->EvaluateAsAbsolute(Res, getAssembler()))
+ if (!Delta->evaluateAsAbsolute(Res, getAssembler()))
return true;
EmitFill(Res, Value);
return false;
diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp
index 5cc629b..3479034 100644
--- a/lib/MC/MCObjectWriter.cpp
+++ b/lib/MC/MCObjectWriter.cpp
@@ -17,7 +17,7 @@ using namespace llvm;
MCObjectWriter::~MCObjectWriter() {
-bool MCObjectWriter::IsSymbolRefDifferenceFullyResolved(
+bool MCObjectWriter::isSymbolRefDifferenceFullyResolved(
const MCAssembler &Asm, const MCSymbolRefExpr *A, const MCSymbolRefExpr *B,
bool InSet) const {
// Modified symbol references cannot be resolved.
@@ -30,16 +30,14 @@ bool MCObjectWriter::IsSymbolRefDifferenceFullyResolved(
if (SA.isUndefined() || SB.isUndefined())
return false;
- const MCSymbolData &DataA = Asm.getSymbolData(SA);
- const MCSymbolData &DataB = Asm.getSymbolData(SB);
- if(!DataA.getFragment() || !DataB.getFragment())
+ if (!SA.getFragment() || !SB.getFragment())
return false;
- return IsSymbolRefDifferenceFullyResolvedImpl(Asm, SA, *DataB.getFragment(),
+ return isSymbolRefDifferenceFullyResolvedImpl(Asm, SA, *SB.getFragment(),
InSet, false);
-bool MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
+bool MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
bool InSet, bool IsPCRel) const {
const MCSection &SecA = SymA.getSection();
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 1e805fd..20366dc 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -630,13 +630,15 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// If we are generating dwarf for assembly source files save the initial text
// section and generate a .file directive.
if (getContext().getGenDwarfForAssembly()) {
- MCSymbol *SectionStartSym = getContext().createTempSymbol();
- getStreamer().EmitLabel(SectionStartSym);
MCSection *Sec = getStreamer().getCurrentSection().first;
+ if (!Sec->getBeginSymbol()) {
+ MCSymbol *SectionStartSym = getContext().createTempSymbol();
+ getStreamer().EmitLabel(SectionStartSym);
+ Sec->setBeginSymbol(SectionStartSym);
+ }
bool InsertResult = getContext().addGenDwarfSection(Sec);
assert(InsertResult && ".text section should not have debug info yet");
- Sec->setBeginSymbol(SectionStartSym);
0, StringRef(), getContext().getMainFileName()));
@@ -787,7 +789,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
Lex(); // Eat the operator.
if (parsePrimaryExpr(Res, EndLoc))
return true;
- Res = MCUnaryExpr::CreateLNot(Res, getContext());
+ Res = MCUnaryExpr::createLNot(Res, getContext());
return false;
case AsmToken::Dollar:
case AsmToken::At:
@@ -801,7 +803,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
// temporary label to the streamer and refer to it.
MCSymbol *Sym = Ctx.createTempSymbol();
- Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
+ Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
EndLoc = FirstTokenLoc;
return false;
@@ -869,7 +871,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
// Otherwise create a symbol ref.
- Res = MCSymbolRefExpr::Create(Sym, Variant, getContext());
+ Res = MCSymbolRefExpr::create(Sym, Variant, getContext());
return false;
case AsmToken::BigNum:
@@ -877,7 +879,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
case AsmToken::Integer: {
SMLoc Loc = getTok().getLoc();
int64_t IntVal = getTok().getIntVal();
- Res = MCConstantExpr::Create(IntVal, getContext());
+ Res = MCConstantExpr::create(IntVal, getContext());
EndLoc = Lexer.getTok().getEndLoc();
Lex(); // Eat token.
// Look for 'b' or 'f' following an Integer as a directional label
@@ -895,7 +897,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
if (IDVal == "f" || IDVal == "b") {
MCSymbol *Sym =
Ctx.getDirectionalLocalSymbol(IntVal, IDVal == "b");
- Res = MCSymbolRefExpr::Create(Sym, Variant, getContext());
+ Res = MCSymbolRefExpr::create(Sym, Variant, getContext());
if (IDVal == "b" && Sym->isUndefined())
return Error(Loc, "invalid reference to undefined symbol");
EndLoc = Lexer.getTok().getEndLoc();
@@ -907,7 +909,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
case AsmToken::Real: {
APFloat RealVal(APFloat::IEEEdouble, getTok().getString());
uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
- Res = MCConstantExpr::Create(IntVal, getContext());
+ Res = MCConstantExpr::create(IntVal, getContext());
EndLoc = Lexer.getTok().getEndLoc();
Lex(); // Eat token.
return false;
@@ -917,7 +919,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
// temporary label to the streamer and refer to it.
MCSymbol *Sym = Ctx.createTempSymbol();
- Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
EndLoc = Lexer.getTok().getEndLoc();
Lex(); // Eat identifier.
return false;
@@ -934,19 +936,19 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
Lex(); // Eat the operator.
if (parsePrimaryExpr(Res, EndLoc))
return true;
- Res = MCUnaryExpr::CreateMinus(Res, getContext());
+ Res = MCUnaryExpr::createMinus(Res, getContext());
return false;
case AsmToken::Plus:
Lex(); // Eat the operator.
if (parsePrimaryExpr(Res, EndLoc))
return true;
- Res = MCUnaryExpr::CreatePlus(Res, getContext());
+ Res = MCUnaryExpr::createPlus(Res, getContext());
return false;
case AsmToken::Tilde:
Lex(); // Eat the operator.
if (parsePrimaryExpr(Res, EndLoc))
return true;
- Res = MCUnaryExpr::CreateNot(Res, getContext());
+ Res = MCUnaryExpr::createNot(Res, getContext());
return false;
@@ -979,7 +981,7 @@ AsmParser::applyModifierToExpr(const MCExpr *E,
return E;
- return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext());
+ return MCSymbolRefExpr::create(&SRE->getSymbol(), Variant, getContext());
case MCExpr::Unary: {
@@ -987,7 +989,7 @@ AsmParser::applyModifierToExpr(const MCExpr *E,
const MCExpr *Sub = applyModifierToExpr(UE->getSubExpr(), Variant);
if (!Sub)
return nullptr;
- return MCUnaryExpr::Create(UE->getOpcode(), Sub, getContext());
+ return MCUnaryExpr::create(UE->getOpcode(), Sub, getContext());
case MCExpr::Binary: {
@@ -1003,7 +1005,7 @@ AsmParser::applyModifierToExpr(const MCExpr *E,
if (!RHS)
RHS = BE->getRHS();
- return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext());
+ return MCBinaryExpr::create(BE->getOpcode(), LHS, RHS, getContext());
@@ -1052,8 +1054,8 @@ bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
// Try to constant fold it up front, if possible.
int64_t Value;
- if (Res->EvaluateAsAbsolute(Value))
- Res = MCConstantExpr::Create(Value, getContext());
+ if (Res->evaluateAsAbsolute(Value))
+ Res = MCConstantExpr::create(Value, getContext());
return false;
@@ -1070,7 +1072,7 @@ bool AsmParser::parseAbsoluteExpression(int64_t &Res) {
if (parseExpression(Expr))
return true;
- if (!Expr->EvaluateAsAbsolute(Res))
+ if (!Expr->evaluateAsAbsolute(Res))
return Error(StartLoc, "expected absolute expression");
return false;
@@ -1181,7 +1183,7 @@ bool AsmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
return true;
// Merge LHS and RHS according to operator.
- Res = MCBinaryExpr::Create(Kind, Res, RHS, getContext());
+ Res = MCBinaryExpr::create(Kind, Res, RHS, getContext());
@@ -1947,7 +1949,7 @@ bool AsmParser::parseMacroArgument(MCAsmMacroArgument &MA, bool Vararg) {
if (Vararg) {
if (Lexer.isNot(AsmToken::EndOfStatement)) {
StringRef Str = parseStringToEndOfStatement();
- MA.push_back(AsmToken(AsmToken::String, Str));
+ MA.emplace_back(AsmToken::String, Str);
return false;
@@ -4344,8 +4346,7 @@ MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
// We Are Anonymous.
- MacroLikeBodies.push_back(
- MCAsmMacro(StringRef(), Body, MCAsmMacroParameters()));
+ MacroLikeBodies.emplace_back(StringRef(), Body, MCAsmMacroParameters());
return &MacroLikeBodies.back();
@@ -4377,7 +4378,7 @@ bool AsmParser::parseDirectiveRept(SMLoc DirectiveLoc, StringRef Dir) {
return true;
int64_t Count;
- if (!CountExpr->EvaluateAsAbsolute(Count)) {
+ if (!CountExpr->evaluateAsAbsolute(Count)) {
return Error(CountLoc, "unexpected token in '" + Dir + "' directive");
@@ -4488,7 +4489,7 @@ bool AsmParser::parseDirectiveIrpc(SMLoc DirectiveLoc) {
StringRef Values = A.front().front().getString();
for (std::size_t I = 0, End = Values.size(); I != End; ++I) {
MCAsmMacroArgument Arg;
- Arg.push_back(AsmToken(AsmToken::Identifier, Values.slice(I, I + 1)));
+ Arg.emplace_back(AsmToken::Identifier, Values.slice(I, I + 1));
// Note that the AtPseudoVariable is enabled for instantiations of .irpc.
// This is undocumented, but GAS seems to support it.
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index 82f7f22..f09bce0 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -57,6 +57,7 @@ class COFFAsmParser : public MCAsmParserExtension {
+ addDirectiveHandler<&COFFAsmParser::ParseDirectiveSafeSEH>(".safeseh");
// Win64 EH directives.
@@ -118,6 +119,7 @@ class COFFAsmParser : public MCAsmParserExtension {
bool ParseDirectiveEndef(StringRef, SMLoc);
bool ParseDirectiveSecRel32(StringRef, SMLoc);
bool ParseDirectiveSecIdx(StringRef, SMLoc);
+ bool ParseDirectiveSafeSEH(StringRef, SMLoc);
bool parseCOMDATType(COFF::COMDATType &Type);
bool ParseDirectiveLinkOnce(StringRef, SMLoc);
@@ -359,7 +361,7 @@ bool COFFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
StringRef COMDATSymName;
if (getLexer().is(AsmToken::Comma)) {
@@ -453,6 +455,21 @@ bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) {
return false;
+bool COFFAsmParser::ParseDirectiveSafeSEH(StringRef, SMLoc) {
+ StringRef SymbolID;
+ if (getParser().parseIdentifier(SymbolID))
+ return TokError("expected identifier in directive");
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+ MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
+ Lex();
+ getStreamer().EmitCOFFSafeSEH(Symbol);
+ return false;
bool COFFAsmParser::ParseDirectiveSecIdx(StringRef, SMLoc) {
StringRef SymbolID;
if (getParser().parseIdentifier(SymbolID))
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 87b15ff..e3585bd 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -16,7 +16,7 @@
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Support/ELF.h"
using namespace llvm;
@@ -209,7 +209,7 @@ bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) {
StringRef Name;
if (getParser().parseIdentifier(Name))
return TokError("expected identifier in directive");
- MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ MCSymbolELF *Sym = cast<MCSymbolELF>(getContext().getOrCreateSymbol(Name));
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
@@ -222,7 +222,7 @@ bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in directive");
- getStreamer().EmitELFSize(Sym, Expr);
+ getStreamer().emitELFSize(Sym, Expr);
return false;
@@ -537,9 +537,11 @@ EndStmt:
if (getContext().getDwarfVersion() <= 2)
Warning(loc, "DWARF2 only supports one section per compilation unit");
- MCSymbol *SectionStartSymbol = getContext().createTempSymbol();
- getStreamer().EmitLabel(SectionStartSymbol);
- ELFSection->setBeginSymbol(SectionStartSymbol);
+ if (!ELFSection->getBeginSymbol()) {
+ MCSymbol *SectionStartSymbol = getContext().createTempSymbol();
+ getStreamer().EmitLabel(SectionStartSymbol);
+ ELFSection->setBeginSymbol(SectionStartSymbol);
+ }
@@ -661,7 +663,7 @@ bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) {
MCSymbol *Alias = getContext().getOrCreateSymbol(AliasName);
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
- const MCExpr *Value = MCSymbolRefExpr::Create(Sym, getContext());
+ const MCExpr *Value = MCSymbolRefExpr::create(Sym, getContext());
getStreamer().EmitAssignment(Alias, Value);
return false;
diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp
index 04f932b..9152f2b 100644
--- a/lib/MC/MCSection.cpp
+++ b/lib/MC/MCSection.cpp
@@ -20,7 +20,8 @@ using namespace llvm;
MCSection::MCSection(SectionVariant V, SectionKind K, MCSymbol *Begin)
- : Begin(Begin), HasInstructions(false), Variant(V), Kind(K) {}
+ : Begin(Begin), BundleGroupBeforeFirstInst(false), HasInstructions(false),
+ IsRegistered(false), Variant(V), Kind(K) {}
MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) {
if (!End)
diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp
index 4d6298c..ce0b4f5 100644
--- a/lib/MC/MCSectionCOFF.cpp
+++ b/lib/MC/MCSectionCOFF.cpp
@@ -94,7 +94,7 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
- OS << *COMDATSymbol;
+ COMDATSymbol->print(OS, &MAI);
OS << '\n';
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index 3cd8453..b4448d7 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -64,8 +64,10 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
if (ShouldOmitSectionDirective(SectionName, MAI)) {
OS << '\t' << getSectionName();
- if (Subsection)
- OS << '\t' << *Subsection;
+ if (Subsection) {
+ OS << '\t';
+ Subsection->print(OS, &MAI);
+ }
OS << '\n';
@@ -153,8 +155,11 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
OS << '\n';
- if (Subsection)
- OS << "\t.subsection\t" << *Subsection << '\n';
+ if (Subsection) {
+ OS << "\t.subsection\t";
+ Subsection->print(OS, &MAI);
+ OS << '\n';
+ }
bool MCSectionELF::UseCodeAlign() const {
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 9e0cc6b..011969a 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -117,7 +117,7 @@ void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
"SectionRelative value requires 4-bytes");
if (!IsSectionRelative)
- EmitValueImpl(MCSymbolRefExpr::Create(Sym, getContext()), Size);
+ EmitValueImpl(MCSymbolRefExpr::create(Sym, getContext()), Size);
@@ -133,7 +133,7 @@ void MCStreamer::EmitGPRel32Value(const MCExpr *Value) {
/// EmitFill - Emit NumBytes bytes worth of the value specified by
/// FillValue. This implements directives such as '.space'.
void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue) {
- const MCExpr *E = MCConstantExpr::Create(FillValue, getContext());
+ const MCExpr *E = MCConstantExpr::create(FillValue, getContext());
for (uint64_t i = 0, e = NumBytes; i != e; ++i)
EmitValue(E, 1);
@@ -391,11 +391,17 @@ void MCStreamer::EmitCFIWindowSave() {
void MCStreamer::EnsureValidWinFrameInfo() {
+ const MCAsmInfo *MAI = Context.getAsmInfo();
+ if (!MAI->usesWindowsCFI())
+ report_fatal_error(".seh_* directives are not supported on this target");
if (!CurrentWinFrameInfo || CurrentWinFrameInfo->End)
report_fatal_error("No open Win64 EH frame function!");
void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol) {
+ const MCAsmInfo *MAI = Context.getAsmInfo();
+ if (!MAI->usesWindowsCFI())
+ report_fatal_error(".seh_* directives are not supported on this target");
if (CurrentWinFrameInfo && !CurrentWinFrameInfo->End)
report_fatal_error("Starting a function before ending the previous one!");
@@ -549,6 +555,9 @@ void MCStreamer::EmitWinCFIEndProlog() {
CurrentWinFrameInfo->PrologEnd = Label;
+void MCStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {
void MCStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
@@ -637,7 +646,7 @@ void MCStreamer::EndCOFFSymbolDef() {}
void MCStreamer::EmitFileDirective(StringRef Filename) {}
void MCStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {}
void MCStreamer::EmitCOFFSymbolType(int Type) {}
-void MCStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
+void MCStreamer::emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value) {}
void MCStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {}
void MCStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
@@ -669,9 +678,9 @@ void MCStreamer::SwitchSection(MCSection *Section, const MCExpr *Subsection) {
MCSectionSubPair curSection = SectionStack.back().first;
SectionStack.back().second = curSection;
if (MCSectionSubPair(Section, Subsection) != curSection) {
+ ChangeSection(Section, Subsection);
SectionStack.back().first = MCSectionSubPair(Section, Subsection);
assert(!Section->hasEnded() && "Section already ended");
- ChangeSection(Section, Subsection);
MCSymbol *Sym = Section->getBeginSymbol();
if (Sym && !Sym->isInSection())
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
index 6abdd3a..7954a02 100644
--- a/lib/MC/MCSubtargetInfo.cpp
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -81,6 +81,11 @@ FeatureBitset MCSubtargetInfo::ToggleFeature(StringRef FS) {
return FeatureBits;
+FeatureBitset MCSubtargetInfo::ApplyFeatureFlag(StringRef FS) {
+ SubtargetFeatures Features;
+ FeatureBits = Features.ApplyFeatureFlag(FeatureBits, FS, ProcFeatures);
+ return FeatureBits;
MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index ddc3814..8d07b76 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -8,60 +8,38 @@
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
// Sentinel value for the absolute pseudo section.
MCSection *MCSymbol::AbsolutePseudoSection = reinterpret_cast<MCSection *>(1);
-static bool isAcceptableChar(char C) {
- if ((C < 'a' || C > 'z') &&
- (C < 'A' || C > 'Z') &&
- (C < '0' || C > '9') &&
- C != '_' && C != '$' && C != '.' && C != '@')
- return false;
- return true;
-/// NameNeedsQuoting - Return true if the identifier \p Str needs quotes to be
-/// syntactically correct.
-static bool NameNeedsQuoting(StringRef Str) {
- assert(!Str.empty() && "Cannot create an empty MCSymbol");
- // If any of the characters in the string is an unacceptable character, force
- // quotes.
- for (unsigned i = 0, e = Str.size(); i != e; ++i)
- if (!isAcceptableChar(Str[i]))
- return true;
- return false;
void MCSymbol::setVariableValue(const MCExpr *Value) {
assert(!IsUsed && "Cannot set a variable that has already been used.");
assert(Value && "Invalid variable value!");
this->Value = Value;
- this->Section = nullptr;
+ SectionOrFragment = nullptr;
-void MCSymbol::print(raw_ostream &OS) const {
+void MCSymbol::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
// The name for this MCSymbol is required to be a valid target name. However,
// some targets support quoting names with funny characters. If the name
// contains a funny character, then print it quoted.
StringRef Name = getName();
- if (Name.empty()) {
- OS << "\"\"";
- return;
- }
- if (!NameNeedsQuoting(Name)) {
+ if (!MAI || MAI->isValidUnquotedName(Name)) {
OS << Name;
+ if (MAI && !MAI->supportsNameQuoting())
+ report_fatal_error("Symbol name with unsupported characters");
OS << '"';
- for (unsigned I = 0, E = Name.size(); I != E; ++I) {
- char C = Name[I];
+ for (char C : Name) {
if (C == '\n')
OS << "\\n";
else if (C == '"')
diff --git a/lib/MC/MCSymbolELF.cpp b/lib/MC/MCSymbolELF.cpp
new file mode 100644
index 0000000..c362065
--- /dev/null
+++ b/lib/MC/MCSymbolELF.cpp
@@ -0,0 +1,213 @@
+//===- lib/MC/MCSymbolELF.cpp ---------------------------------------------===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/Support/ELF.h"
+namespace llvm {
+namespace {
+enum {
+ // Shift value for STT_* flags. 7 possible values. 3 bits.
+ ELF_STT_Shift = 0,
+ // Shift value for STB_* flags. 4 possible values, 2 bits.
+ ELF_STB_Shift = 3,
+ // Shift value for STV_* flags. 4 possible values, 2 bits.
+ ELF_STV_Shift = 5,
+ // Shift value for STO_* flags. 3 bits. All the values are between 0x20 and
+ // 0xe0, so we shift right by 5 before storing.
+ ELF_STO_Shift = 7,
+ // One bit.
+ ELF_IsSignature_Shift = 10,
+ // One bit.
+ ELF_WeakrefUsedInReloc_Shift = 11,
+ // One bit.
+ ELF_UsedInReloc_Shift = 12,
+ // One bit.
+ ELF_BindingSet_Shift = 13
+void MCSymbolELF::setBinding(unsigned Binding) const {
+ setIsBindingSet();
+ unsigned Val;
+ switch (Binding) {
+ default:
+ llvm_unreachable("Unsupported Binding");
+ case ELF::STB_LOCAL:
+ Val = 0;
+ break;
+ Val = 1;
+ break;
+ case ELF::STB_WEAK:
+ Val = 2;
+ break;
+ Val = 3;
+ break;
+ }
+ uint32_t OtherFlags = getFlags() & ~(0x3 << ELF_STB_Shift);
+ setFlags(OtherFlags | (Val << ELF_STB_Shift));
+unsigned MCSymbolELF::getBinding() const {
+ if (isBindingSet()) {
+ uint32_t Val = (getFlags() & (0x3 << ELF_STB_Shift)) >> ELF_STB_Shift;
+ switch (Val) {
+ default:
+ llvm_unreachable("Invalid value");
+ case 0:
+ return ELF::STB_LOCAL;
+ case 1:
+ return ELF::STB_GLOBAL;
+ case 2:
+ return ELF::STB_WEAK;
+ case 3:
+ }
+ }
+ if (isDefined())
+ return ELF::STB_LOCAL;
+ if (isUsedInReloc())
+ return ELF::STB_GLOBAL;
+ if (isWeakrefUsedInReloc())
+ return ELF::STB_WEAK;
+ if (isSignature())
+ return ELF::STB_LOCAL;
+ return ELF::STB_GLOBAL;
+void MCSymbolELF::setType(unsigned Type) const {
+ unsigned Val;
+ switch (Type) {
+ default:
+ llvm_unreachable("Unsupported Binding");
+ Val = 0;
+ break;
+ Val = 1;
+ break;
+ case ELF::STT_FUNC:
+ Val = 2;
+ break;
+ Val = 3;
+ break;
+ Val = 4;
+ break;
+ case ELF::STT_TLS:
+ Val = 5;
+ break;
+ Val = 6;
+ break;
+ }
+ uint32_t OtherFlags = getFlags() & ~(0x7 << ELF_STT_Shift);
+ setFlags(OtherFlags | (Val << ELF_STT_Shift));
+unsigned MCSymbolELF::getType() const {
+ uint32_t Val = (getFlags() & (0x7 << ELF_STT_Shift)) >> ELF_STT_Shift;
+ switch (Val) {
+ default:
+ llvm_unreachable("Invalid value");
+ case 0:
+ return ELF::STT_NOTYPE;
+ case 1:
+ return ELF::STT_OBJECT;
+ case 2:
+ return ELF::STT_FUNC;
+ case 3:
+ return ELF::STT_SECTION;
+ case 4:
+ return ELF::STT_COMMON;
+ case 5:
+ return ELF::STT_TLS;
+ case 6:
+ return ELF::STT_GNU_IFUNC;
+ }
+void MCSymbolELF::setVisibility(unsigned Visibility) {
+ assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
+ Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
+ uint32_t OtherFlags = getFlags() & ~(0x3 << ELF_STV_Shift);
+ setFlags(OtherFlags | (Visibility << ELF_STV_Shift));
+unsigned MCSymbolELF::getVisibility() const {
+ unsigned Visibility = (getFlags() & (0x3 << ELF_STV_Shift)) >> ELF_STV_Shift;
+ assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
+ Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
+ return Visibility;
+void MCSymbolELF::setOther(unsigned Other) {
+ assert((Other & 0x1f) == 0);
+ Other >>= 5;
+ assert(Other <= 0x7);
+ uint32_t OtherFlags = getFlags() & ~(0x7 << ELF_STO_Shift);
+ setFlags(OtherFlags | (Other << ELF_STO_Shift));
+unsigned MCSymbolELF::getOther() const {
+ unsigned Other = (getFlags() & (0x7 << ELF_STO_Shift)) >> ELF_STO_Shift;
+ return Other << 5;
+void MCSymbolELF::setUsedInReloc() const {
+ uint32_t OtherFlags = getFlags() & ~(0x1 << ELF_UsedInReloc_Shift);
+ setFlags(OtherFlags | (1 << ELF_UsedInReloc_Shift));
+bool MCSymbolELF::isUsedInReloc() const {
+ return getFlags() & (0x1 << ELF_UsedInReloc_Shift);
+void MCSymbolELF::setIsWeakrefUsedInReloc() const {
+ uint32_t OtherFlags = getFlags() & ~(0x1 << ELF_WeakrefUsedInReloc_Shift);
+ setFlags(OtherFlags | (1 << ELF_WeakrefUsedInReloc_Shift));
+bool MCSymbolELF::isWeakrefUsedInReloc() const {
+ return getFlags() & (0x1 << ELF_WeakrefUsedInReloc_Shift);
+void MCSymbolELF::setIsSignature() const {
+ uint32_t OtherFlags = getFlags() & ~(0x1 << ELF_IsSignature_Shift);
+ setFlags(OtherFlags | (1 << ELF_IsSignature_Shift));
+bool MCSymbolELF::isSignature() const {
+ return getFlags() & (0x1 << ELF_IsSignature_Shift);
+void MCSymbolELF::setIsBindingSet() const {
+ uint32_t OtherFlags = getFlags() & ~(0x1 << ELF_BindingSet_Shift);
+ setFlags(OtherFlags | (1 << ELF_BindingSet_Shift));
+bool MCSymbolELF::isBindingSet() const {
+ return getFlags() & (0x1 << ELF_BindingSet_Shift);
diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp
index f87ea67..1b73b7a 100644
--- a/lib/MC/MCWin64EH.cpp
+++ b/lib/MC/MCWin64EH.cpp
@@ -51,8 +51,8 @@ static void EmitAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS,
const MCSymbol *RHS) {
MCContext &Context = Streamer.getContext();
const MCExpr *Diff =
- MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(LHS, Context),
- MCSymbolRefExpr::Create(RHS, Context), Context);
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(LHS, Context),
+ MCSymbolRefExpr::create(RHS, Context), Context);
Streamer.EmitValue(Diff, 1);
@@ -126,13 +126,13 @@ static void EmitSymbolRefWithOfs(MCStreamer &streamer,
const MCSymbol *Base,
const MCSymbol *Other) {
MCContext &Context = streamer.getContext();
- const MCSymbolRefExpr *BaseRef = MCSymbolRefExpr::Create(Base, Context);
- const MCSymbolRefExpr *OtherRef = MCSymbolRefExpr::Create(Other, Context);
- const MCExpr *Ofs = MCBinaryExpr::CreateSub(OtherRef, BaseRef, Context);
- const MCSymbolRefExpr *BaseRefRel = MCSymbolRefExpr::Create(Base,
+ const MCSymbolRefExpr *BaseRef = MCSymbolRefExpr::create(Base, Context);
+ const MCSymbolRefExpr *OtherRef = MCSymbolRefExpr::create(Other, Context);
+ const MCExpr *Ofs = MCBinaryExpr::createSub(OtherRef, BaseRef, Context);
+ const MCSymbolRefExpr *BaseRefRel = MCSymbolRefExpr::create(Base,
- streamer.EmitValue(MCBinaryExpr::CreateAdd(BaseRefRel, Ofs, Context), 4);
+ streamer.EmitValue(MCBinaryExpr::createAdd(BaseRefRel, Ofs, Context), 4);
static void EmitRuntimeFunction(MCStreamer &streamer,
@@ -142,7 +142,7 @@ static void EmitRuntimeFunction(MCStreamer &streamer,
EmitSymbolRefWithOfs(streamer, info->Function, info->Begin);
EmitSymbolRefWithOfs(streamer, info->Function, info->End);
- streamer.EmitValue(MCSymbolRefExpr::Create(info->Symbol,
+ streamer.EmitValue(MCSymbolRefExpr::create(info->Symbol,
context), 4);
@@ -207,7 +207,7 @@ static void EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
EmitRuntimeFunction(streamer, info->ChainedParent);
else if (flags &
((Win64EH::UNW_TerminateHandler|Win64EH::UNW_ExceptionHandler) << 3))
- streamer.EmitValue(MCSymbolRefExpr::Create(info->ExceptionHandler,
+ streamer.EmitValue(MCSymbolRefExpr::create(info->ExceptionHandler,
context), 4);
else if (numCodes == 0) {
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index ce34ba0..8ce6127 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -15,10 +15,9 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixupKindInfo.h"
-#include "llvm/MC/MCMachOSymbolFlags.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolMachO.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -46,7 +45,7 @@ bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol &S) {
// References to weak definitions require external relocation entries; the
// definition may not always be the one in the same object file.
- if (S.getData().getFlags() & SF_WeakDefinition)
+ if (cast<MCSymbolMachO>(S).isWeakDefinition())
return true;
// Otherwise, we can use an internal relocation.
@@ -81,7 +80,7 @@ uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S,
MCValue Target;
- if (!S.getVariableValue()->EvaluateAsRelocatable(Target, &Layout, nullptr))
+ if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr))
report_fatal_error("unable to evaluate offset for variable '" +
S.getName() + "'");
@@ -101,7 +100,7 @@ uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S,
return Address;
- return getSectionAddress(S.getData().getFragment()->getParent()) +
+ return getSectionAddress(S.getFragment()->getParent()) +
@@ -118,7 +117,7 @@ uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec,
return OffsetToAlignment(EndAddr, NextSec.getAlignment());
-void MachObjectWriter::WriteHeader(unsigned NumLoadCommands,
+void MachObjectWriter::writeHeader(unsigned NumLoadCommands,
unsigned LoadCommandsSize,
bool SubsectionsViaSymbols) {
uint32_t Flags = 0;
@@ -132,27 +131,27 @@ void MachObjectWriter::WriteHeader(unsigned NumLoadCommands,
uint64_t Start = OS.tell();
(void) Start;
- Write32(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC);
+ write32(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC);
- Write32(TargetObjectWriter->getCPUType());
- Write32(TargetObjectWriter->getCPUSubtype());
+ write32(TargetObjectWriter->getCPUType());
+ write32(TargetObjectWriter->getCPUSubtype());
- Write32(MachO::MH_OBJECT);
- Write32(NumLoadCommands);
- Write32(LoadCommandsSize);
- Write32(Flags);
+ write32(MachO::MH_OBJECT);
+ write32(NumLoadCommands);
+ write32(LoadCommandsSize);
+ write32(Flags);
if (is64Bit())
- Write32(0); // reserved
+ write32(0); // reserved
assert(OS.tell() - Start ==
(is64Bit()?sizeof(MachO::mach_header_64): sizeof(MachO::mach_header)));
-/// WriteSegmentLoadCommand - Write a segment load command.
+/// writeSegmentLoadCommand - Write a segment load command.
/// \param NumSections The number of sections in this segment.
/// \param SectionDataSize The total size of the sections.
-void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections,
+void MachObjectWriter::writeSegmentLoadCommand(unsigned NumSections,
uint64_t VMSize,
uint64_t SectionDataStartOffset,
uint64_t SectionDataSize) {
@@ -165,34 +164,34 @@ void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections,
unsigned SegmentLoadCommandSize =
is64Bit() ? sizeof(MachO::segment_command_64):
- Write32(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT);
- Write32(SegmentLoadCommandSize +
+ write32(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT);
+ write32(SegmentLoadCommandSize +
NumSections * (is64Bit() ? sizeof(MachO::section_64) :
- WriteBytes("", 16);
+ writeBytes("", 16);
if (is64Bit()) {
- Write64(0); // vmaddr
- Write64(VMSize); // vmsize
- Write64(SectionDataStartOffset); // file offset
- Write64(SectionDataSize); // file size
+ write64(0); // vmaddr
+ write64(VMSize); // vmsize
+ write64(SectionDataStartOffset); // file offset
+ write64(SectionDataSize); // file size
} else {
- Write32(0); // vmaddr
- Write32(VMSize); // vmsize
- Write32(SectionDataStartOffset); // file offset
- Write32(SectionDataSize); // file size
+ write32(0); // vmaddr
+ write32(VMSize); // vmsize
+ write32(SectionDataStartOffset); // file offset
+ write32(SectionDataSize); // file size
// maxprot
// initprot
- Write32(NumSections);
- Write32(0); // flags
+ write32(NumSections);
+ write32(0); // flags
assert(OS.tell() - Start == SegmentLoadCommandSize);
-void MachObjectWriter::WriteSection(const MCAssembler &Asm,
+void MachObjectWriter::writeSection(const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCSection &Sec, uint64_t FileOffset,
uint64_t RelocationsStart,
@@ -212,36 +211,36 @@ void MachObjectWriter::WriteSection(const MCAssembler &Asm,
uint64_t Start = OS.tell();
(void) Start;
- WriteBytes(Section.getSectionName(), 16);
- WriteBytes(Section.getSegmentName(), 16);
+ writeBytes(Section.getSectionName(), 16);
+ writeBytes(Section.getSegmentName(), 16);
if (is64Bit()) {
- Write64(getSectionAddress(&Sec)); // address
- Write64(SectionSize); // size
+ write64(getSectionAddress(&Sec)); // address
+ write64(SectionSize); // size
} else {
- Write32(getSectionAddress(&Sec)); // address
- Write32(SectionSize); // size
+ write32(getSectionAddress(&Sec)); // address
+ write32(SectionSize); // size
- Write32(FileOffset);
+ write32(FileOffset);
unsigned Flags = Section.getTypeAndAttributes();
if (Section.hasInstructions())
assert(isPowerOf2_32(Section.getAlignment()) && "Invalid alignment!");
- Write32(Log2_32(Section.getAlignment()));
- Write32(NumRelocations ? RelocationsStart : 0);
- Write32(NumRelocations);
- Write32(Flags);
- Write32(IndirectSymBase.lookup(&Sec)); // reserved1
- Write32(Section.getStubSize()); // reserved2
+ write32(Log2_32(Section.getAlignment()));
+ write32(NumRelocations ? RelocationsStart : 0);
+ write32(NumRelocations);
+ write32(Flags);
+ write32(IndirectSymBase.lookup(&Sec)); // reserved1
+ write32(Section.getStubSize()); // reserved2
if (is64Bit())
- Write32(0); // reserved3
+ write32(0); // reserved3
assert(OS.tell() - Start == (is64Bit() ? sizeof(MachO::section_64) :
-void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset,
+void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset,
uint32_t NumSymbols,
uint32_t StringTableOffset,
uint32_t StringTableSize) {
@@ -250,17 +249,17 @@ void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset,
uint64_t Start = OS.tell();
(void) Start;
- Write32(MachO::LC_SYMTAB);
- Write32(sizeof(MachO::symtab_command));
- Write32(SymbolOffset);
- Write32(NumSymbols);
- Write32(StringTableOffset);
- Write32(StringTableSize);
+ write32(MachO::LC_SYMTAB);
+ write32(sizeof(MachO::symtab_command));
+ write32(SymbolOffset);
+ write32(NumSymbols);
+ write32(StringTableOffset);
+ write32(StringTableSize);
assert(OS.tell() - Start == sizeof(MachO::symtab_command));
-void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
+void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,
uint32_t NumLocalSymbols,
uint32_t FirstExternalSymbol,
uint32_t NumExternalSymbols,
@@ -273,43 +272,37 @@ void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
uint64_t Start = OS.tell();
(void) Start;
- Write32(MachO::LC_DYSYMTAB);
- Write32(sizeof(MachO::dysymtab_command));
- Write32(FirstLocalSymbol);
- Write32(NumLocalSymbols);
- Write32(FirstExternalSymbol);
- Write32(NumExternalSymbols);
- Write32(FirstUndefinedSymbol);
- Write32(NumUndefinedSymbols);
- Write32(0); // tocoff
- Write32(0); // ntoc
- Write32(0); // modtaboff
- Write32(0); // nmodtab
- Write32(0); // extrefsymoff
- Write32(0); // nextrefsyms
- Write32(IndirectSymbolOffset);
- Write32(NumIndirectSymbols);
- Write32(0); // extreloff
- Write32(0); // nextrel
- Write32(0); // locreloff
- Write32(0); // nlocrel
+ write32(MachO::LC_DYSYMTAB);
+ write32(sizeof(MachO::dysymtab_command));
+ write32(FirstLocalSymbol);
+ write32(NumLocalSymbols);
+ write32(FirstExternalSymbol);
+ write32(NumExternalSymbols);
+ write32(FirstUndefinedSymbol);
+ write32(NumUndefinedSymbols);
+ write32(0); // tocoff
+ write32(0); // ntoc
+ write32(0); // modtaboff
+ write32(0); // nmodtab
+ write32(0); // extrefsymoff
+ write32(0); // nextrefsyms
+ write32(IndirectSymbolOffset);
+ write32(NumIndirectSymbols);
+ write32(0); // extreloff
+ write32(0); // nextrel
+ write32(0); // locreloff
+ write32(0); // nlocrel
assert(OS.tell() - Start == sizeof(MachO::dysymtab_command));
MachObjectWriter::MachSymbolData *
MachObjectWriter::findSymbolData(const MCSymbol &Sym) {
- for (auto &Entry : LocalSymbolData)
- if (Entry.Symbol == &Sym)
- return &Entry;
- for (auto &Entry : ExternalSymbolData)
- if (Entry.Symbol == &Sym)
- return &Entry;
- for (auto &Entry : UndefinedSymbolData)
- if (Entry.Symbol == &Sym)
- return &Entry;
+ for (auto *SymbolData :
+ {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
+ for (MachSymbolData &Entry : *SymbolData)
+ if (Entry.Symbol == &Sym)
+ return &Entry;
return nullptr;
@@ -326,14 +319,13 @@ const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const {
return *S;
-void MachObjectWriter::WriteNlist(MachSymbolData &MSD,
+void MachObjectWriter::writeNlist(MachSymbolData &MSD,
const MCAsmLayout &Layout) {
const MCSymbol *Symbol = MSD.Symbol;
- MCSymbolData &Data = Symbol->getData();
+ const MCSymbol &Data = *Symbol;
const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol);
uint8_t SectionIndex = MSD.SectionIndex;
uint8_t Type = 0;
- uint16_t Flags = Data.getFlags();
uint64_t Address = 0;
bool IsAlias = Symbol != AliasedSymbol;
@@ -373,52 +365,37 @@ void MachObjectWriter::WriteNlist(MachSymbolData &MSD,
Address = AliaseeInfo->StringIndex;
else if (Symbol->isDefined())
Address = getSymbolAddress(OrigSymbol, Layout);
- else if (Data.isCommon()) {
+ else if (Symbol->isCommon()) {
// Common symbols are encoded with the size in the address
// field, and their alignment in the flags.
- Address = Data.getCommonSize();
- // Common alignment is packed into the 'desc' bits.
- if (unsigned Align = Data.getCommonAlignment()) {
- unsigned Log2Size = Log2_32(Align);
- assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
- if (Log2Size > 15)
- report_fatal_error("invalid 'common' alignment '" +
- Twine(Align) + "' for '" + Symbol->getName() + "'",
- false);
- // FIXME: Keep this mask with the SymbolFlags enumeration.
- Flags = (Flags & 0xF0FF) | (Log2Size << 8);
- }
+ Address = Symbol->getCommonSize();
- if (Layout.getAssembler().isThumbFunc(Symbol))
- Flags |= SF_ThumbFunc;
// struct nlist (12 bytes)
- Write32(MSD.StringIndex);
- Write8(Type);
- Write8(SectionIndex);
+ write32(MSD.StringIndex);
+ write8(Type);
+ write8(SectionIndex);
// The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
// value.
- Write16(Flags);
+ write16(cast<MCSymbolMachO>(Symbol)->getEncodedFlags());
if (is64Bit())
- Write64(Address);
+ write64(Address);
- Write32(Address);
+ write32(Address);
-void MachObjectWriter::WriteLinkeditLoadCommand(uint32_t Type,
+void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type,
uint32_t DataOffset,
uint32_t DataSize) {
uint64_t Start = OS.tell();
(void) Start;
- Write32(Type);
- Write32(sizeof(MachO::linkedit_data_command));
- Write32(DataOffset);
- Write32(DataSize);
+ write32(Type);
+ write32(sizeof(MachO::linkedit_data_command));
+ write32(DataOffset);
+ write32(DataSize);
assert(OS.tell() - Start == sizeof(MachO::linkedit_data_command));
@@ -427,45 +404,44 @@ static unsigned ComputeLinkerOptionsLoadCommandSize(
const std::vector<std::string> &Options, bool is64Bit)
unsigned Size = sizeof(MachO::linker_option_command);
- for (unsigned i = 0, e = Options.size(); i != e; ++i)
- Size += Options[i].size() + 1;
+ for (const std::string &Option : Options)
+ Size += Option.size() + 1;
return RoundUpToAlignment(Size, is64Bit ? 8 : 4);
-void MachObjectWriter::WriteLinkerOptionsLoadCommand(
+void MachObjectWriter::writeLinkerOptionsLoadCommand(
const std::vector<std::string> &Options)
unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit());
uint64_t Start = OS.tell();
(void) Start;
- Write32(MachO::LC_LINKER_OPTION);
- Write32(Size);
- Write32(Options.size());
+ write32(MachO::LC_LINKER_OPTION);
+ write32(Size);
+ write32(Options.size());
uint64_t BytesWritten = sizeof(MachO::linker_option_command);
- for (unsigned i = 0, e = Options.size(); i != e; ++i) {
+ for (const std::string &Option : Options) {
// Write each string, including the null byte.
- const std::string &Option = Options[i];
- WriteBytes(Option.c_str(), Option.size() + 1);
+ writeBytes(Option.c_str(), Option.size() + 1);
BytesWritten += Option.size() + 1;
// Pad to a multiple of the pointer size.
- WriteBytes("", OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4));
+ writeBytes("", OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4));
assert(OS.tell() - Start == Size);
-void MachObjectWriter::RecordRelocation(MCAssembler &Asm,
+void MachObjectWriter::recordRelocation(MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
bool &IsPCRel, uint64_t &FixedValue) {
- TargetObjectWriter->RecordRelocation(this, Asm, Layout, Fragment, Fixup,
+ TargetObjectWriter->recordRelocation(this, Asm, Layout, Fragment, Fixup,
Target, FixedValue);
-void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) {
+void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
// This is the point where 'as' creates actual symbols for indirect symbols
// (in the following two passes). It would be easier for us to do this sooner
// when we see the attribute, but that makes getting the order in the symbol
@@ -500,7 +476,7 @@ void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) {
// Initialize the section indirect symbol base, if necessary.
IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
- Asm.getOrCreateSymbolData(*it->Symbol);
+ Asm.registerSymbol(*it->Symbol);
// Then lazy symbol pointers and symbol stubs.
@@ -520,14 +496,14 @@ void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) {
// FIXME: Do not hardcode.
bool Created;
- MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created);
+ Asm.registerSymbol(*it->Symbol, &Created);
if (Created)
- Entry.setFlags(Entry.getFlags() | 0x0001);
+ cast<MCSymbolMachO>(it->Symbol)->setReferenceTypeUndefinedLazy(true);
-/// ComputeSymbolTable - Compute the symbol table data
-void MachObjectWriter::ComputeSymbolTable(
+/// computeSymbolTable - Compute the symbol table data
+void MachObjectWriter::computeSymbolTable(
MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData,
std::vector<MachSymbolData> &ExternalSymbolData,
std::vector<MachSymbolData> &UndefinedSymbolData) {
@@ -554,13 +530,11 @@ void MachObjectWriter::ComputeSymbolTable(
// match 'as'. Even though it doesn't matter for correctness, this is
// important for letting us diff .o files.
for (const MCSymbol &Symbol : Asm.symbols()) {
- MCSymbolData &SD = Symbol.getData();
// Ignore non-linker visible symbols.
if (!Asm.isSymbolLinkerVisible(Symbol))
- if (!SD.isExternal() && !Symbol.isUndefined())
+ if (!Symbol.isExternal() && !Symbol.isUndefined())
MachSymbolData MSD;
@@ -582,13 +556,11 @@ void MachObjectWriter::ComputeSymbolTable(
// Now add the data for local symbols.
for (const MCSymbol &Symbol : Asm.symbols()) {
- MCSymbolData &SD = Symbol.getData();
// Ignore non-linker visible symbols.
if (!Asm.isSymbolLinkerVisible(Symbol))
- if (SD.isExternal() || Symbol.isUndefined())
+ if (Symbol.isExternal() || Symbol.isUndefined())
MachSymbolData MSD;
@@ -611,16 +583,13 @@ void MachObjectWriter::ComputeSymbolTable(
// Set the symbol indices.
Index = 0;
- for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
- LocalSymbolData[i].Symbol->setIndex(Index++);
- for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
- ExternalSymbolData[i].Symbol->setIndex(Index++);
- for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
- UndefinedSymbolData[i].Symbol->setIndex(Index++);
+ for (auto *SymbolData :
+ {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
+ for (MachSymbolData &Entry : *SymbolData)
+ Entry.Symbol->setIndex(Index++);
for (const MCSection &Section : Asm) {
- std::vector<RelAndSymbol> &Relocs = Relocations[&Section];
- for (RelAndSymbol &Rel : Relocs) {
+ for (RelAndSymbol &Rel : Relocations[&Section]) {
if (!Rel.Sym)
@@ -638,9 +607,7 @@ void MachObjectWriter::ComputeSymbolTable(
void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
const MCAsmLayout &Layout) {
uint64_t StartAddress = 0;
- const SmallVectorImpl<MCSection *> &Order = Layout.getSectionOrder();
- for (int i = 0, n = Order.size(); i != n ; ++i) {
- const MCSection *Sec = Order[i];
+ for (const MCSection *Sec : Layout.getSectionOrder()) {
StartAddress = RoundUpToAlignment(StartAddress, Sec->getAlignment());
SectionAddress[Sec] = StartAddress;
StartAddress += Layout.getSectionAddressSize(Sec);
@@ -652,15 +619,15 @@ void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
-void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+void MachObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) {
computeSectionAddresses(Asm, Layout);
// Create symbol data for any indirect symbols.
- BindIndirectSymbols(Asm);
+ bindIndirectSymbols(Asm);
-bool MachObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
+bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
bool InSet, bool IsPCRel) const {
if (InSet)
@@ -692,8 +659,7 @@ bool MachObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
bool hasReliableSymbolDifference = isX86_64();
if (!hasReliableSymbolDifference) {
if (!SA.isInSection() || &SecA != &SecB ||
- (!SA.isTemporary() &&
- FB.getAtom() != Asm.getSymbolData(SA).getFragment()->getAtom() &&
+ (!SA.isTemporary() && FB.getAtom() != SA.getFragment()->getAtom() &&
return false;
return true;
@@ -708,16 +674,13 @@ bool MachObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
SA.isTemporary() && SA.isInSection() && &SecA == &SecB){
return true;
- } else {
- if (!TargetObjectWriter->useAggressiveSymbolFolding())
- return false;
// If they are not in the same section, we can't compute the diff.
if (&SecA != &SecB)
return false;
- const MCFragment *FA = Asm.getSymbolData(SA).getFragment();
+ const MCFragment *FA = SA.getFragment();
// Bail if the symbol has no fragment.
if (!FA)
@@ -731,10 +694,10 @@ bool MachObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
return false;
-void MachObjectWriter::WriteObject(MCAssembler &Asm,
+void MachObjectWriter::writeObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
// Compute symbol table information and bind symbol indices.
- ComputeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
+ computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
unsigned NumSections = Asm.size();
@@ -779,12 +742,9 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
// Add the linker option load commands sizes.
- const std::vector<std::vector<std::string> > &LinkerOptions =
- Asm.getLinkerOptions();
- for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) {
+ for (const auto &Option : Asm.getLinkerOptions()) {
- LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(LinkerOptions[i],
- is64Bit());
+ LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit());
// Compute the total size of the section data, as well as its file size and vm
@@ -794,9 +754,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
uint64_t SectionDataSize = 0;
uint64_t SectionDataFileSize = 0;
uint64_t VMSize = 0;
- for (MCAssembler::const_iterator it = Asm.begin(),
- ie = Asm.end(); it != ie; ++it) {
- const MCSection &Sec = *it;
+ for (const MCSection &Sec : Asm) {
uint64_t Address = getSectionAddress(&Sec);
uint64_t Size = Layout.getSectionAddressSize(&Sec);
uint64_t FileSize = Layout.getSectionFileSize(&Sec);
@@ -804,7 +762,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
VMSize = std::max(VMSize, Address + Size);
- if (it->isVirtualSection())
+ if (Sec.isVirtualSection())
SectionDataSize = std::max(SectionDataSize, Address + Size);
@@ -818,19 +776,18 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
SectionDataFileSize += SectionDataPadding;
// Write the prolog, starting with the header and load command...
- WriteHeader(NumLoadCommands, LoadCommandsSize,
+ writeHeader(NumLoadCommands, LoadCommandsSize,
- WriteSegmentLoadCommand(NumSections, VMSize,
+ writeSegmentLoadCommand(NumSections, VMSize,
SectionDataStart, SectionDataSize);
// ... and then the section headers.
uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
- for (MCAssembler::const_iterator it = Asm.begin(),
- ie = Asm.end(); it != ie; ++it) {
- std::vector<RelAndSymbol> &Relocs = Relocations[&*it];
+ for (const MCSection &Sec : Asm) {
+ std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
unsigned NumRelocs = Relocs.size();
- uint64_t SectionStart = SectionDataStart + getSectionAddress(&*it);
- WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
+ uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec);
+ writeSection(Asm, Layout, Sec, SectionStart, RelocTableEnd, NumRelocs);
RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info);
@@ -841,11 +798,11 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
assert(VersionInfo.Major < 65536 && "unencodable major target version");
uint32_t EncodedVersion = VersionInfo.Update | (VersionInfo.Minor << 8) |
(VersionInfo.Major << 16);
- Write32(VersionInfo.Kind == MCVM_OSXVersionMin ? MachO::LC_VERSION_MIN_MACOSX :
+ write32(VersionInfo.Kind == MCVM_OSXVersionMin ? MachO::LC_VERSION_MIN_MACOSX :
- Write32(sizeof(MachO::version_min_command));
- Write32(EncodedVersion);
- Write32(0); // reserved.
+ write32(sizeof(MachO::version_min_command));
+ write32(EncodedVersion);
+ write32(0); // reserved.
// Write the data-in-code load command, if used.
@@ -853,14 +810,14 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
if (NumDataRegions) {
uint64_t DataRegionsOffset = RelocTableEnd;
uint64_t DataRegionsSize = NumDataRegions * 8;
- WriteLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset,
+ writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset,
// Write the loh load command, if used.
uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize;
if (LOHSize)
- WriteLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT,
+ writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT,
DataInCodeTableEnd, LOHSize);
// Write the symbol table load command, if used.
@@ -889,24 +846,21 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
SymbolTableOffset + NumSymTabSymbols * (is64Bit() ?
sizeof(MachO::nlist_64) :
- WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
+ writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
- WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
+ writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
FirstExternalSymbol, NumExternalSymbols,
FirstUndefinedSymbol, NumUndefinedSymbols,
IndirectSymbolOffset, NumIndirectSymbols);
// Write the linker options load commands.
- for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) {
- WriteLinkerOptionsLoadCommand(LinkerOptions[i]);
- }
+ for (const auto &Option : Asm.getLinkerOptions())
+ writeLinkerOptionsLoadCommand(Option);
// Write the actual section data.
- for (MCAssembler::const_iterator it = Asm.begin(),
- ie = Asm.end(); it != ie; ++it) {
- MCSection &Sec = *it;
+ for (const MCSection &Sec : Asm) {
Asm.writeSectionData(&Sec, Layout);
uint64_t Pad = getPaddingSize(&Sec, Layout);
@@ -917,14 +871,13 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
// Write the relocation entries.
- for (MCAssembler::const_iterator it = Asm.begin(),
- ie = Asm.end(); it != ie; ++it) {
+ for (const MCSection &Sec : Asm) {
// Write the section relocation entries, in reverse order to match 'as'
// (approximately, the exact algorithm is more complicated than this).
- std::vector<RelAndSymbol> &Relocs = Relocations[&*it];
- for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
- Write32(Relocs[e - i - 1].MRE.r_word0);
- Write32(Relocs[e - i - 1].MRE.r_word1);
+ std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
+ for (const RelAndSymbol &Rel : make_range(Relocs.rbegin(), Relocs.rend())) {
+ write32(Rel.MRE.r_word0);
+ write32(Rel.MRE.r_word1);
@@ -940,9 +893,9 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
<< " end: " << End << "(" << Data->End->getName() << ")"
<< " size: " << End - Start
<< "\n");
- Write32(Start);
- Write16(End - Start);
- Write16(Data->Kind);
+ write32(Start);
+ write16(End - Start);
+ write16(Data->Kind);
// Write out the loh commands, if there is one.
@@ -950,9 +903,9 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
#ifndef NDEBUG
unsigned Start = OS.tell();
- Asm.getLOHContainer().Emit(*this, Layout);
+ Asm.getLOHContainer().emit(*this, Layout);
// Pad to a multiple of the pointer size.
- WriteBytes("", OffsetToAlignment(LOHRawSize, is64Bit() ? 8 : 4));
+ writeBytes("", OffsetToAlignment(LOHRawSize, is64Bit() ? 8 : 4));
assert(OS.tell() - Start == LOHSize);
@@ -968,28 +921,25 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
static_cast<const MCSectionMachO &>(*it->Section);
if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) {
// If this symbol is defined and internal, mark it as such.
- if (it->Symbol->isDefined() &&
- !Asm.getSymbolData(*it->Symbol).isExternal()) {
+ if (it->Symbol->isDefined() && !it->Symbol->isExternal()) {
uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL;
if (it->Symbol->isAbsolute())
- Write32(Flags);
+ write32(Flags);
- Write32(it->Symbol->getIndex());
+ write32(it->Symbol->getIndex());
// FIXME: Check that offsets match computed ones.
// Write the symbol table entries.
- for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
- WriteNlist(LocalSymbolData[i], Layout);
- for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
- WriteNlist(ExternalSymbolData[i], Layout);
- for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
- WriteNlist(UndefinedSymbolData[i], Layout);
+ for (auto *SymbolData :
+ {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
+ for (MachSymbolData &Entry : *SymbolData)
+ writeNlist(Entry, Layout);
// Write the string table.
OS <<;
diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp
index 78006e0..76574e9 100644
--- a/lib/MC/SubtargetFeature.cpp
+++ b/lib/MC/SubtargetFeature.cpp
@@ -12,6 +12,7 @@
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
@@ -59,27 +60,6 @@ static void Split(std::vector<std::string> &V, StringRef S) {
V.assign(Tmp.begin(), Tmp.end());
-/// Join a vector of strings to a string with a comma separating each element.
-static std::string Join(const std::vector<std::string> &V) {
- // Start with empty string.
- std::string Result;
- // If the vector is not empty
- if (!V.empty()) {
- // Start with the first feature
- Result = V[0];
- // For each successive feature
- for (size_t i = 1; i < V.size(); i++) {
- // Add a comma
- Result += ",";
- // Add the feature
- Result += V[i];
- }
- }
- // Return the features string
- return Result;
/// Adding features.
void SubtargetFeatures::AddFeature(StringRef String, bool Enable) {
// Don't add empty features.
@@ -144,7 +124,7 @@ SubtargetFeatures::SubtargetFeatures(StringRef Initial) {
std::string SubtargetFeatures::getString() const {
- return Join(Features);
+ return join(Features.begin(), Features.end(), ",");
/// SetImpliedBits - For each feature that is (transitively) implied by this
@@ -210,6 +190,38 @@ SubtargetFeatures::ToggleFeature(FeatureBitset Bits, StringRef Feature,
return Bits;
+SubtargetFeatures::ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature,
+ ArrayRef<SubtargetFeatureKV> FeatureTable) {
+ assert(hasFlag(Feature));
+ // Find feature in table.
+ const SubtargetFeatureKV *FeatureEntry =
+ Find(StripFlag(Feature), FeatureTable);
+ // If there is a match
+ if (FeatureEntry) {
+ // Enable/disable feature in bits
+ if (isEnabled(Feature)) {
+ Bits |= FeatureEntry->Value;
+ // For each feature that this implies, set it.
+ SetImpliedBits(Bits, FeatureEntry, FeatureTable);
+ } else {
+ Bits &= ~FeatureEntry->Value;
+ // For each feature that implies this, clear it.
+ ClearImpliedBits(Bits, FeatureEntry, FeatureTable);
+ }
+ } else {
+ errs() << "'" << Feature
+ << "' is not a recognized feature for this target"
+ << " (ignoring feature)\n";
+ }
+ return Bits;
/// getFeatureBits - Get feature bits a CPU.
@@ -265,28 +277,7 @@ SubtargetFeatures::getFeatureBits(StringRef CPU,
if (Feature == "+help")
Help(CPUTable, FeatureTable);
- // Find feature in table.
- const SubtargetFeatureKV *FeatureEntry =
- Find(StripFlag(Feature), FeatureTable);
- // If there is a match
- if (FeatureEntry) {
- // Enable/disable feature in bits
- if (isEnabled(Feature)) {
- Bits |= FeatureEntry->Value;
- // For each feature that this implies, set it.
- SetImpliedBits(Bits, FeatureEntry, FeatureTable);
- } else {
- Bits &= ~FeatureEntry->Value;
- // For each feature that implies this, clear it.
- ClearImpliedBits(Bits, FeatureEntry, FeatureTable);
- }
- } else {
- errs() << "'" << Feature
- << "' is not a recognized feature for this target"
- << " (ignoring feature)\n";
- }
+ Bits = ApplyFeatureFlag(Bits, Feature, FeatureTable);
return Bits;
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index c945085..423c7dc 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -21,10 +21,11 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionCOFF.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolCOFF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/COFF.h"
@@ -76,6 +77,13 @@ public:
void set_name_offset(uint32_t Offset);
bool should_keep() const;
+ int64_t getIndex() const { return Index; }
+ void setIndex(int Value) {
+ Index = Value;
+ if (MC)
+ MC->setIndex(static_cast<uint32_t>(Value));
+ }
// This class contains staging data for a COFF relocation entry.
@@ -161,27 +169,27 @@ public:
void WriteFileHeader(const COFF::header &Header);
void WriteSymbol(const COFFSymbol &S);
void WriteAuxiliarySymbols(const COFFSymbol::AuxiliarySymbols &S);
- void WriteSectionHeader(const COFF::section &S);
+ void writeSectionHeader(const COFF::section &S);
void WriteRelocation(const COFF::relocation &R);
// MCObjectWriter interface implementation.
- void ExecutePostLayoutBinding(MCAssembler &Asm,
+ void executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) override;
- bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+ bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
const MCSymbol &SymA,
const MCFragment &FB, bool InSet,
bool IsPCRel) const override;
bool isWeak(const MCSymbol &Sym) const override;
- void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
+ void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, bool &IsPCRel,
uint64_t &FixedValue) override;
- void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
+ void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
@@ -219,6 +227,10 @@ bool COFFSymbol::should_keep() const {
return true;
+ // if this is a safeseh handler, keep it
+ if (MC && (cast<MCSymbolCOFF>(MC)->isSafeSEH()))
+ return true;
// if the section its in is being droped, drop it
if (Section->Number == -1)
return false;
@@ -364,9 +376,8 @@ void WinCOFFObjectWriter::defineSection(MCSectionCOFF const &Sec) {
static uint64_t getSymbolValue(const MCSymbol &Symbol,
const MCAsmLayout &Layout) {
- const MCSymbolData &Data = Symbol.getData();
- if (Data.isCommon() && Data.isExternal())
- return Data.getCommonSize();
+ if (Symbol.isCommon() && Symbol.isExternal())
+ return Symbol.getCommonSize();
uint64_t Res;
if (!Layout.getSymbolOffset(Symbol, Res))
@@ -383,7 +394,7 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &Symbol,
COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&Symbol);
SymbolMap[&Symbol] = coff_symbol;
- if (Symbol.getData().getFlags() & COFF::SF_WeakExternal) {
+ if (cast<MCSymbolCOFF>(Symbol).isWeakExternal()) {
coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
if (Symbol.isVariable()) {
@@ -414,17 +425,17 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &Symbol,
coff_symbol->MC = &Symbol;
} else {
- const MCSymbolData &ResSymData = Assembler.getSymbolData(Symbol);
const MCSymbol *Base = Layout.getBaseSymbol(Symbol);
coff_symbol->Data.Value = getSymbolValue(Symbol, Layout);
- coff_symbol->Data.Type = (ResSymData.getFlags() & 0x0000FFFF) >> 0;
- coff_symbol->Data.StorageClass = (ResSymData.getFlags() & 0x00FF0000) >> 16;
+ const MCSymbolCOFF &SymbolCOFF = cast<MCSymbolCOFF>(Symbol);
+ coff_symbol->Data.Type = SymbolCOFF.getType();
+ coff_symbol->Data.StorageClass = SymbolCOFF.getClass();
// If no storage class was specified in the streamer, define it here.
- if (coff_symbol->Data.StorageClass == 0) {
- bool IsExternal = ResSymData.isExternal() ||
- (!ResSymData.getFragment() && !Symbol.isVariable());
+ if (coff_symbol->Data.StorageClass == COFF::IMAGE_SYM_CLASS_NULL) {
+ bool IsExternal = Symbol.isExternal() ||
+ (!Symbol.getFragment() && !Symbol.isVariable());
coff_symbol->Data.StorageClass = IsExternal
@@ -434,9 +445,8 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &Symbol,
if (!Base) {
coff_symbol->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
} else {
- const MCSymbolData &BaseData = Assembler.getSymbolData(*Base);
- if (BaseData.getFragment()) {
- COFFSection *Sec = SectionMap[BaseData.getFragment()->getParent()];
+ if (Base->getFragment()) {
+ COFFSection *Sec = SectionMap[Base->getFragment()->getParent()];
if (coff_symbol->Section && coff_symbol->Section != Sec)
report_fatal_error("conflicting sections for symbol");
@@ -535,40 +545,40 @@ bool WinCOFFObjectWriter::IsPhysicalSection(COFFSection *S) {
void WinCOFFObjectWriter::WriteFileHeader(const COFF::header &Header) {
if (UseBigObj) {
- WriteLE16(0xFFFF);
- WriteLE16(COFF::BigObjHeader::MinBigObjectVersion);
- WriteLE16(Header.Machine);
- WriteLE32(Header.TimeDateStamp);
- WriteBytes(StringRef(COFF::BigObjMagic, sizeof(COFF::BigObjMagic)));
- WriteLE32(0);
- WriteLE32(0);
- WriteLE32(0);
- WriteLE32(0);
- WriteLE32(Header.NumberOfSections);
- WriteLE32(Header.PointerToSymbolTable);
- WriteLE32(Header.NumberOfSymbols);
+ writeLE16(0xFFFF);
+ writeLE16(COFF::BigObjHeader::MinBigObjectVersion);
+ writeLE16(Header.Machine);
+ writeLE32(Header.TimeDateStamp);
+ writeBytes(StringRef(COFF::BigObjMagic, sizeof(COFF::BigObjMagic)));
+ writeLE32(0);
+ writeLE32(0);
+ writeLE32(0);
+ writeLE32(0);
+ writeLE32(Header.NumberOfSections);
+ writeLE32(Header.PointerToSymbolTable);
+ writeLE32(Header.NumberOfSymbols);
} else {
- WriteLE16(Header.Machine);
- WriteLE16(static_cast<int16_t>(Header.NumberOfSections));
- WriteLE32(Header.TimeDateStamp);
- WriteLE32(Header.PointerToSymbolTable);
- WriteLE32(Header.NumberOfSymbols);
- WriteLE16(Header.SizeOfOptionalHeader);
- WriteLE16(Header.Characteristics);
+ writeLE16(Header.Machine);
+ writeLE16(static_cast<int16_t>(Header.NumberOfSections));
+ writeLE32(Header.TimeDateStamp);
+ writeLE32(Header.PointerToSymbolTable);
+ writeLE32(Header.NumberOfSymbols);
+ writeLE16(Header.SizeOfOptionalHeader);
+ writeLE16(Header.Characteristics);
void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol &S) {
- WriteBytes(StringRef(S.Data.Name, COFF::NameSize));
- WriteLE32(S.Data.Value);
+ writeBytes(StringRef(S.Data.Name, COFF::NameSize));
+ writeLE32(S.Data.Value);
if (UseBigObj)
- WriteLE32(S.Data.SectionNumber);
+ writeLE32(S.Data.SectionNumber);
- WriteLE16(static_cast<int16_t>(S.Data.SectionNumber));
- WriteLE16(S.Data.Type);
- Write8(S.Data.StorageClass);
- Write8(S.Data.NumberOfAuxSymbols);
+ writeLE16(static_cast<int16_t>(S.Data.SectionNumber));
+ writeLE16(S.Data.Type);
+ write8(S.Data.StorageClass);
+ write8(S.Data.NumberOfAuxSymbols);
@@ -578,44 +588,44 @@ void WinCOFFObjectWriter::WriteAuxiliarySymbols(
i != e; ++i) {
switch (i->AuxType) {
case ATFunctionDefinition:
- WriteLE32(i->Aux.FunctionDefinition.TagIndex);
- WriteLE32(i->Aux.FunctionDefinition.TotalSize);
- WriteLE32(i->Aux.FunctionDefinition.PointerToLinenumber);
- WriteLE32(i->Aux.FunctionDefinition.PointerToNextFunction);
+ writeLE32(i->Aux.FunctionDefinition.TagIndex);
+ writeLE32(i->Aux.FunctionDefinition.TotalSize);
+ writeLE32(i->Aux.FunctionDefinition.PointerToLinenumber);
+ writeLE32(i->Aux.FunctionDefinition.PointerToNextFunction);
if (UseBigObj)
WriteZeros(COFF::Symbol32Size - COFF::Symbol16Size);
case ATbfAndefSymbol:
- WriteLE16(i->Aux.bfAndefSymbol.Linenumber);
+ writeLE16(i->Aux.bfAndefSymbol.Linenumber);
- WriteLE32(i->Aux.bfAndefSymbol.PointerToNextFunction);
+ writeLE32(i->Aux.bfAndefSymbol.PointerToNextFunction);
if (UseBigObj)
WriteZeros(COFF::Symbol32Size - COFF::Symbol16Size);
case ATWeakExternal:
- WriteLE32(i->Aux.WeakExternal.TagIndex);
- WriteLE32(i->Aux.WeakExternal.Characteristics);
+ writeLE32(i->Aux.WeakExternal.TagIndex);
+ writeLE32(i->Aux.WeakExternal.Characteristics);
if (UseBigObj)
WriteZeros(COFF::Symbol32Size - COFF::Symbol16Size);
case ATFile:
- WriteBytes(
+ writeBytes(
StringRef(reinterpret_cast<const char *>(&i->Aux),
UseBigObj ? COFF::Symbol32Size : COFF::Symbol16Size));
case ATSectionDefinition:
- WriteLE32(i->Aux.SectionDefinition.Length);
- WriteLE16(i->Aux.SectionDefinition.NumberOfRelocations);
- WriteLE16(i->Aux.SectionDefinition.NumberOfLinenumbers);
- WriteLE32(i->Aux.SectionDefinition.CheckSum);
- WriteLE16(static_cast<int16_t>(i->Aux.SectionDefinition.Number));
- Write8(i->Aux.SectionDefinition.Selection);
+ writeLE32(i->Aux.SectionDefinition.Length);
+ writeLE16(i->Aux.SectionDefinition.NumberOfRelocations);
+ writeLE16(i->Aux.SectionDefinition.NumberOfLinenumbers);
+ writeLE32(i->Aux.SectionDefinition.CheckSum);
+ writeLE16(static_cast<int16_t>(i->Aux.SectionDefinition.Number));
+ write8(i->Aux.SectionDefinition.Selection);
- WriteLE16(static_cast<int16_t>(i->Aux.SectionDefinition.Number >> 16));
+ writeLE16(static_cast<int16_t>(i->Aux.SectionDefinition.Number >> 16));
if (UseBigObj)
WriteZeros(COFF::Symbol32Size - COFF::Symbol16Size);
@@ -623,30 +633,30 @@ void WinCOFFObjectWriter::WriteAuxiliarySymbols(
-void WinCOFFObjectWriter::WriteSectionHeader(const COFF::section &S) {
- WriteBytes(StringRef(S.Name, COFF::NameSize));
- WriteLE32(S.VirtualSize);
- WriteLE32(S.VirtualAddress);
- WriteLE32(S.SizeOfRawData);
- WriteLE32(S.PointerToRawData);
- WriteLE32(S.PointerToRelocations);
- WriteLE32(S.PointerToLineNumbers);
- WriteLE16(S.NumberOfRelocations);
- WriteLE16(S.NumberOfLineNumbers);
- WriteLE32(S.Characteristics);
+void WinCOFFObjectWriter::writeSectionHeader(const COFF::section &S) {
+ writeBytes(StringRef(S.Name, COFF::NameSize));
+ writeLE32(S.VirtualSize);
+ writeLE32(S.VirtualAddress);
+ writeLE32(S.SizeOfRawData);
+ writeLE32(S.PointerToRawData);
+ writeLE32(S.PointerToRelocations);
+ writeLE32(S.PointerToLineNumbers);
+ writeLE16(S.NumberOfRelocations);
+ writeLE16(S.NumberOfLineNumbers);
+ writeLE32(S.Characteristics);
void WinCOFFObjectWriter::WriteRelocation(const COFF::relocation &R) {
- WriteLE32(R.VirtualAddress);
- WriteLE32(R.SymbolTableIndex);
- WriteLE16(R.Type);
+ writeLE32(R.VirtualAddress);
+ writeLE32(R.SymbolTableIndex);
+ writeLE16(R.Type);
// MCObjectWriter interface implementations
-void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+void WinCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) {
// "Define" each section & symbol. This creates section & symbol
// entries in the staging area.
@@ -658,23 +668,21 @@ void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
DefineSymbol(Symbol, Asm, Layout);
-bool WinCOFFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
+bool WinCOFFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
bool InSet, bool IsPCRel) const {
// MS LINK expects to be able to replace all references to a function with a
// thunk to implement their /INCREMENTAL feature. Make sure we don't optimize
// away any relocations to functions.
- if ((((SymA.getData().getFlags() & COFF::SF_TypeMask) >>
- COFF::SF_TypeShift) >>
+ uint16_t Type = cast<MCSymbolCOFF>(SymA).getType();
return false;
- return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB,
+ return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB,
InSet, IsPCRel);
bool WinCOFFObjectWriter::isWeak(const MCSymbol &Sym) const {
- const MCSymbolData &SD = Sym.getData();
- if (!SD.isExternal())
+ if (!Sym.isExternal())
return false;
if (!Sym.isInSection())
@@ -690,27 +698,25 @@ bool WinCOFFObjectWriter::isWeak(const MCSymbol &Sym) const {
return true;
-void WinCOFFObjectWriter::RecordRelocation(
+void WinCOFFObjectWriter::recordRelocation(
MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) {
assert(Target.getSymA() && "Relocation must reference a symbol!");
const MCSymbol &Symbol = Target.getSymA()->getSymbol();
const MCSymbol &A = Symbol;
- if (!Asm.hasSymbolData(A))
+ if (!A.isRegistered())
Twine("symbol '") + A.getName() +
"' can not be undefined");
- const MCSymbolData &A_SD = Asm.getSymbolData(A);
MCSection *Section = Fragment->getParent();
// Mark this symbol as requiring an entry in the symbol table.
assert(SectionMap.find(Section) != SectionMap.end() &&
- "Section must already have been defined in ExecutePostLayoutBinding!");
+ "Section must already have been defined in executePostLayoutBinding!");
assert(SymbolMap.find(&A) != SymbolMap.end() &&
- "Symbol must already have been defined in ExecutePostLayoutBinding!");
+ "Symbol must already have been defined in executePostLayoutBinding!");
COFFSection *coff_section = SectionMap[Section];
COFFSymbol *coff_symbol = SymbolMap[&A];
@@ -719,14 +725,13 @@ void WinCOFFObjectWriter::RecordRelocation(
if (SymB) {
const MCSymbol *B = &SymB->getSymbol();
- const MCSymbolData &B_SD = Asm.getSymbolData(*B);
- if (!B_SD.getFragment())
+ if (!B->getFragment())
Twine("symbol '") + B->getName() +
"' can not be undefined in a subtraction expression");
- if (!A_SD.getFragment())
+ if (!A.getFragment())
Twine("symbol '") + Symbol.getName() +
@@ -763,9 +768,8 @@ void WinCOFFObjectWriter::RecordRelocation(
// Turn relocations for temporary symbols into section relocations.
if (coff_symbol->MC->isTemporary() || CrossSection) {
Reloc.Symb = coff_symbol->Section->Symbol;
- FixedValue +=
- Layout.getFragmentOffset(coff_symbol->MC->getData().getFragment()) +
- coff_symbol->MC->getData().getOffset();
+ FixedValue += Layout.getFragmentOffset(coff_symbol->MC->getFragment()) +
+ coff_symbol->MC->getOffset();
} else
Reloc.Symb = coff_symbol;
@@ -825,7 +829,7 @@ void WinCOFFObjectWriter::RecordRelocation(
-void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
+void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
size_t SectionsSize = Sections.size();
if (SectionsSize > static_cast<size_t>(INT32_MAX))
@@ -837,13 +841,9 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
UseBigObj = NumberOfSections > COFF::MaxNumberOfSections16;
- DenseMap<COFFSection *, int32_t> SectionIndices(
- NextPowerOf2(NumberOfSections));
// Assign section numbers.
size_t Number = 1;
for (const auto &Section : Sections) {
- SectionIndices[Section.get()] = Number;
Section->Number = Number;
Section->Symbol->Data.SectionNumber = Number;
Section->Symbol->Aux[0].Aux.SectionDefinition.Number = Number;
@@ -853,11 +853,10 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
Header.NumberOfSections = NumberOfSections;
Header.NumberOfSymbols = 0;
- for (auto FI = Asm.file_names_begin(), FE = Asm.file_names_end(); FI != FE;
- ++FI) {
+ for (const std::string &Name : Asm.getFileNames()) {
// round up to calculate the number of auxiliary symbols required
unsigned SymbolSize = UseBigObj ? COFF::Symbol32Size : COFF::Symbol16Size;
- unsigned Count = (FI->size() + SymbolSize - 1) / SymbolSize;
+ unsigned Count = (Name.size() + SymbolSize - 1) / SymbolSize;
COFFSymbol *file = createSymbol(".file");
file->Data.SectionNumber = COFF::IMAGE_SYM_DEBUG;
@@ -865,15 +864,15 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
unsigned Offset = 0;
- unsigned Length = FI->size();
+ unsigned Length = Name.size();
for (auto &Aux : file->Aux) {
Aux.AuxType = ATFile;
if (Length > SymbolSize) {
- memcpy(&Aux.Aux, FI->c_str() + Offset, SymbolSize);
+ memcpy(&Aux.Aux, Name.c_str() + Offset, SymbolSize);
Length = Length - SymbolSize;
} else {
- memcpy(&Aux.Aux, FI->c_str() + Offset, Length);
+ memcpy(&Aux.Aux, Name.c_str() + Offset, Length);
memset((char *)&Aux.Aux + Length, 0, SymbolSize - Length);
@@ -887,12 +886,13 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
if (Symbol->Section)
Symbol->Data.SectionNumber = Symbol->Section->Number;
if (Symbol->should_keep()) {
- Symbol->Index = Header.NumberOfSymbols++;
+ Symbol->setIndex(Header.NumberOfSymbols++);
// Update auxiliary symbol info.
Symbol->Data.NumberOfAuxSymbols = Symbol->Aux.size();
Header.NumberOfSymbols += Symbol->Data.NumberOfAuxSymbols;
- } else
- Symbol->Index = -1;
+ } else {
+ Symbol->setIndex(-1);
+ }
// Build string table.
@@ -914,11 +914,11 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
// Fixup weak external references.
for (auto &Symbol : Symbols) {
if (Symbol->Other) {
- assert(Symbol->Index != -1);
+ assert(Symbol->getIndex() != -1);
assert(Symbol->Aux.size() == 1 && "Symbol must contain one aux symbol!");
assert(Symbol->Aux[0].AuxType == ATWeakExternal &&
"Symbol's aux symbol must be a Weak External!");
- Symbol->Aux[0].Aux.WeakExternal.TagIndex = Symbol->Other->Index;
+ Symbol->Aux[0].Aux.WeakExternal.TagIndex = Symbol->Other->getIndex();
@@ -944,8 +944,7 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
if (Assoc->Number == -1)
- Section->Symbol->Aux[0].Aux.SectionDefinition.Number =
- SectionIndices[Assoc];
+ Section->Symbol->Aux[0].Aux.SectionDefinition.Number = Assoc->Number;
// Assign file offsets to COFF object file structures.
@@ -994,8 +993,8 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
offset += COFF::RelocationSize * Sec->Relocations.size();
for (auto &Relocation : Sec->Relocations) {
- assert(Relocation.Symb->Index != -1);
- Relocation.Data.SymbolTableIndex = Relocation.Symb->Index;
+ assert(Relocation.Symb->getIndex() != -1);
+ Relocation.Data.SymbolTableIndex = Relocation.Symb->getIndex();
@@ -1021,13 +1020,13 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
sections::iterator i, ie;
- MCAssembler::const_iterator j, je;
+ MCAssembler::iterator j, je;
for (auto &Section : Sections) {
if (Section->Number != -1) {
if (Section->Relocations.size() >= 0xffff)
Section->Header.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL;
- WriteSectionHeader(Section->Header);
+ writeSectionHeader(Section->Header);
@@ -1077,7 +1076,7 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
"Header::PointerToSymbolTable is insane!");
for (auto &Symbol : Symbols)
- if (Symbol->Index != -1)
+ if (Symbol->getIndex() != -1)
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index d2fbd37..41fc8e4 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -11,7 +11,6 @@
-#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
@@ -23,7 +22,7 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolCOFF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCWinCOFFStreamer.h"
#include "llvm/Support/COFF.h"
@@ -97,17 +96,17 @@ bool MCWinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
Symbol->getSection().getVariant() == MCSection::SV_COFF) &&
"Got non-COFF section in the COFF backend!");
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ getAssembler().registerSymbol(*Symbol);
switch (Attribute) {
default: return false;
case MCSA_WeakReference:
case MCSA_Weak:
- SD.modifyFlags(COFF::SF_WeakExternal, COFF::SF_WeakExternal);
- SD.setExternal(true);
+ cast<MCSymbolCOFF>(Symbol)->setIsWeakExternal();
+ Symbol->setExternal(true);
case MCSA_Global:
- SD.setExternal(true);
+ Symbol->setExternal(true);
@@ -134,11 +133,11 @@ void MCWinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
FatalError("storage class specified outside of symbol definition");
if (StorageClass & ~COFF::SSC_Invalid)
- FatalError(Twine("storage class value '") + itostr(StorageClass) +
+ FatalError("storage class value '" + Twine(StorageClass) +
"' out of range");
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*CurSymbol);
- SD.modifyFlags(StorageClass << COFF::SF_ClassShift, COFF::SF_ClassMask);
+ getAssembler().registerSymbol(*CurSymbol);
+ cast<MCSymbolCOFF>(CurSymbol)->setClass((uint16_t)StorageClass);
void MCWinCOFFStreamer::EmitCOFFSymbolType(int Type) {
@@ -146,10 +145,10 @@ void MCWinCOFFStreamer::EmitCOFFSymbolType(int Type) {
FatalError("symbol type specified outside of a symbol definition");
if (Type & ~0xffff)
- FatalError(Twine("type value '") + itostr(Type) + "' out of range");
+ FatalError("type value '" + Twine(Type) + "' out of range");
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*CurSymbol);
- SD.modifyFlags(Type << COFF::SF_TypeShift, COFF::SF_TypeMask);
+ getAssembler().registerSymbol(*CurSymbol);
+ cast<MCSymbolCOFF>(CurSymbol)->setType((uint16_t)Type);
void MCWinCOFFStreamer::EndCOFFSymbolDef() {
@@ -158,9 +157,30 @@ void MCWinCOFFStreamer::EndCOFFSymbolDef() {
CurSymbol = nullptr;
+void MCWinCOFFStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {
+ // SafeSEH is a feature specific to 32-bit x86. It does not exist (and is
+ // unnecessary) on all platforms which use table-based exception dispatch.
+ if (getContext().getObjectFileInfo()->getTargetTriple().getArch() !=
+ Triple::x86)
+ return;
+ if (cast<MCSymbolCOFF>(Symbol)->isSafeSEH())
+ return;
+ MCSection *SXData = getContext().getObjectFileInfo()->getSXDataSection();
+ getAssembler().registerSection(*SXData);
+ if (SXData->getAlignment() < 4)
+ SXData->setAlignment(4);
+ new MCSafeSEHFragment(Symbol, SXData);
+ getAssembler().registerSymbol(*Symbol);
+ cast<MCSymbolCOFF>(Symbol)->setIsSafeSEH();
void MCWinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
MCDataFragment *DF = getOrCreateDataFragment();
- const MCSymbolRefExpr *SRE = MCSymbolRefExpr::Create(Symbol, getContext());
+ const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(Symbol, getContext());
MCFixup Fixup = MCFixup::create(DF->getContents().size(), SRE, FK_SecRel_2);
DF->getContents().resize(DF->getContents().size() + 2, 0);
@@ -168,16 +188,12 @@ void MCWinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
void MCWinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) {
MCDataFragment *DF = getOrCreateDataFragment();
- const MCSymbolRefExpr *SRE = MCSymbolRefExpr::Create(Symbol, getContext());
+ const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(Symbol, getContext());
MCFixup Fixup = MCFixup::create(DF->getContents().size(), SRE, FK_SecRel_4);
DF->getContents().resize(DF->getContents().size() + 4, 0);
-void MCWinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
- llvm_unreachable("not supported");
void MCWinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {
assert((!Symbol->isInSection() ||
@@ -195,9 +211,9 @@ void MCWinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
AssignSection(Symbol, nullptr);
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
- SD.setExternal(true);
- SD.setCommon(Size, ByteAlignment);
+ getAssembler().registerSymbol(*Symbol);
+ Symbol->setExternal(true);
+ Symbol->setCommon(Size, ByteAlignment);
if (!T.isKnownWindowsMSVCEnvironment() && ByteAlignment > 1) {
SmallString<128> Directive;
@@ -224,8 +240,8 @@ void MCWinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
if (Section->getAlignment() < ByteAlignment)
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
- SD.setExternal(false);
+ getAssembler().registerSymbol(*Symbol);
+ Symbol->setExternal(false);
AssignSection(Symbol, Section);
@@ -235,7 +251,7 @@ void MCWinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
MCFillFragment *Fragment = new MCFillFragment(
/*Value=*/0, /*ValueSize=*/0, Size, Section);
- SD.setFragment(Fragment);
+ Symbol->setFragment(Fragment);
void MCWinCOFFStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index fb91eed..54ed954 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -224,7 +224,7 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
child_iterator e = child_end();
if (i == e) {
- ec = object_error::success;
+ ec = std::error_code();
@@ -254,7 +254,7 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
SymbolTable = i;
FirstRegular = i;
- ec = object_error::success;
+ ec = std::error_code();
@@ -298,14 +298,14 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
StringTable = i;
FirstRegular = i;
- ec = object_error::success;
+ ec = std::error_code();
if (Name[0] != '/') {
Format = has64SymTable ? K_MIPS64 : K_GNU;
FirstRegular = i;
- ec = object_error::success;
+ ec = std::error_code();
@@ -320,7 +320,7 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
if (i == e) {
FirstRegular = i;
- ec = object_error::success;
+ ec = std::error_code();
@@ -332,7 +332,7 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
FirstRegular = i;
- ec = object_error::success;
+ ec = std::error_code();
Archive::child_iterator Archive::child_begin(bool SkipInternal) const {
diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp
new file mode 100644
index 0000000..90a736f
--- /dev/null
+++ b/lib/Object/ArchiveWriter.cpp
@@ -0,0 +1,342 @@
+//===- ArchiveWriter.cpp - ar File Format implementation --------*- C++ -*-===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file defines the writeArchive function.
+#include "llvm/Object/ArchiveWriter.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/raw_ostream.h"
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#include <io.h>
+using namespace llvm;
+NewArchiveIterator::NewArchiveIterator() {}
+NewArchiveIterator::NewArchiveIterator(object::Archive::child_iterator I,
+ StringRef Name)
+ : IsNewMember(false), Name(Name), OldI(I) {}
+NewArchiveIterator::NewArchiveIterator(StringRef NewFilename, StringRef Name)
+ : IsNewMember(true), Name(Name), NewFilename(NewFilename) {}
+StringRef NewArchiveIterator::getName() const { return Name; }
+bool NewArchiveIterator::isNewMember() const { return IsNewMember; }
+object::Archive::child_iterator NewArchiveIterator::getOld() const {
+ assert(!IsNewMember);
+ return OldI;
+StringRef NewArchiveIterator::getNew() const {
+ assert(IsNewMember);
+ return NewFilename;
+NewArchiveIterator::getFD(sys::fs::file_status &NewStatus) const {
+ assert(IsNewMember);
+ int NewFD;
+ if (auto EC = sys::fs::openFileForRead(NewFilename, NewFD))
+ return EC;
+ assert(NewFD != -1);
+ if (auto EC = sys::fs::status(NewFD, NewStatus))
+ return EC;
+ // Opening a directory doesn't make sense. Let it fail.
+ // Linux cannot open directories with open(2), although
+ // cygwin and *bsd can.
+ if (NewStatus.type() == sys::fs::file_type::directory_file)
+ return make_error_code(std::errc::is_a_directory);
+ return NewFD;
+template <typename T>
+static void printWithSpacePadding(raw_fd_ostream &OS, T Data, unsigned Size,
+ bool MayTruncate = false) {
+ uint64_t OldPos = OS.tell();
+ OS << Data;
+ unsigned SizeSoFar = OS.tell() - OldPos;
+ if (Size > SizeSoFar) {
+ unsigned Remaining = Size - SizeSoFar;
+ for (unsigned I = 0; I < Remaining; ++I)
+ OS << ' ';
+ } else if (Size < SizeSoFar) {
+ assert(MayTruncate && "Data doesn't fit in Size");
+ // Some of the data this is used for (like UID) can be larger than the
+ // space available in the archive format. Truncate in that case.
+ + Size);
+ }
+static void print32BE(raw_fd_ostream &Out, unsigned Val) {
+ // FIXME: Should use Endian.h here.
+ for (int I = 3; I >= 0; --I) {
+ char V = (Val >> (8 * I)) & 0xff;
+ Out << V;
+ }
+static void printRestOfMemberHeader(raw_fd_ostream &Out,
+ const sys::TimeValue &ModTime, unsigned UID,
+ unsigned GID, unsigned Perms,
+ unsigned Size) {
+ printWithSpacePadding(Out, ModTime.toEpochTime(), 12);
+ printWithSpacePadding(Out, UID, 6, true);
+ printWithSpacePadding(Out, GID, 6, true);
+ printWithSpacePadding(Out, format("%o", Perms), 8);
+ printWithSpacePadding(Out, Size, 10);
+ Out << "`\n";
+static void printMemberHeader(raw_fd_ostream &Out, StringRef Name,
+ const sys::TimeValue &ModTime, unsigned UID,
+ unsigned GID, unsigned Perms, unsigned Size) {
+ printWithSpacePadding(Out, Twine(Name) + "/", 16);
+ printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size);
+static void printMemberHeader(raw_fd_ostream &Out, unsigned NameOffset,
+ const sys::TimeValue &ModTime, unsigned UID,
+ unsigned GID, unsigned Perms, unsigned Size) {
+ Out << '/';
+ printWithSpacePadding(Out, NameOffset, 15);
+ printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size);
+static void writeStringTable(raw_fd_ostream &Out,
+ ArrayRef<NewArchiveIterator> Members,
+ std::vector<unsigned> &StringMapIndexes) {
+ unsigned StartOffset = 0;
+ for (ArrayRef<NewArchiveIterator>::iterator I = Members.begin(),
+ E = Members.end();
+ I != E; ++I) {
+ StringRef Name = I->getName();
+ if (Name.size() < 16)
+ continue;
+ if (StartOffset == 0) {
+ printWithSpacePadding(Out, "//", 58);
+ Out << "`\n";
+ StartOffset = Out.tell();
+ }
+ StringMapIndexes.push_back(Out.tell() - StartOffset);
+ Out << Name << "/\n";
+ }
+ if (StartOffset == 0)
+ return;
+ if (Out.tell() % 2)
+ Out << '\n';
+ int Pos = Out.tell();
+ - 12);
+ printWithSpacePadding(Out, Pos - StartOffset, 10);
+// Returns the offset of the first reference to a member offset.
+static ErrorOr<unsigned>
+writeSymbolTable(raw_fd_ostream &Out, ArrayRef<NewArchiveIterator> Members,
+ ArrayRef<MemoryBufferRef> Buffers,
+ std::vector<unsigned> &MemberOffsetRefs) {
+ unsigned StartOffset = 0;
+ unsigned MemberNum = 0;
+ std::string NameBuf;
+ raw_string_ostream NameOS(NameBuf);
+ unsigned NumSyms = 0;
+ LLVMContext Context;
+ for (ArrayRef<NewArchiveIterator>::iterator I = Members.begin(),
+ E = Members.end();
+ I != E; ++I, ++MemberNum) {
+ MemoryBufferRef MemberBuffer = Buffers[MemberNum];
+ ErrorOr<std::unique_ptr<object::SymbolicFile>> ObjOrErr =
+ object::SymbolicFile::createSymbolicFile(
+ MemberBuffer, sys::fs::file_magic::unknown, &Context);
+ if (!ObjOrErr)
+ continue; // FIXME: check only for "not an object file" errors.
+ object::SymbolicFile &Obj = *ObjOrErr.get();
+ if (!StartOffset) {
+ printMemberHeader(Out, "", sys::TimeValue::now(), 0, 0, 0, 0);
+ StartOffset = Out.tell();
+ print32BE(Out, 0);
+ }
+ for (const object::BasicSymbolRef &S : Obj.symbols()) {
+ uint32_t Symflags = S.getFlags();
+ if (Symflags & object::SymbolRef::SF_FormatSpecific)
+ continue;
+ if (!(Symflags & object::SymbolRef::SF_Global))
+ continue;
+ if (Symflags & object::SymbolRef::SF_Undefined)
+ continue;
+ if (auto EC = S.printName(NameOS))
+ return EC;
+ NameOS << '\0';
+ ++NumSyms;
+ MemberOffsetRefs.push_back(MemberNum);
+ print32BE(Out, 0);
+ }
+ }
+ Out << NameOS.str();
+ if (StartOffset == 0)
+ return 0;
+ if (Out.tell() % 2)
+ Out << '\0';
+ unsigned Pos = Out.tell();
+ - 12);
+ printWithSpacePadding(Out, Pos - StartOffset, 10);
+ print32BE(Out, NumSyms);
+ return StartOffset + 4;
+std::pair<StringRef, std::error_code>
+llvm::writeArchive(StringRef ArcName,
+ std::vector<NewArchiveIterator> &NewMembers,
+ bool WriteSymtab) {
+ SmallString<128> TmpArchive;
+ int TmpArchiveFD;
+ if (auto EC = sys::fs::createUniqueFile(ArcName + ".temp-archive-%%%%%%%.a",
+ TmpArchiveFD, TmpArchive))
+ return std::make_pair(ArcName, EC);
+ tool_output_file Output(TmpArchive, TmpArchiveFD);
+ raw_fd_ostream &Out = Output.os();
+ Out << "!<arch>\n";
+ std::vector<unsigned> MemberOffsetRefs;
+ std::vector<std::unique_ptr<MemoryBuffer>> Buffers;
+ std::vector<MemoryBufferRef> Members;
+ std::vector<sys::fs::file_status> NewMemberStatus;
+ for (unsigned I = 0, N = NewMembers.size(); I < N; ++I) {
+ NewArchiveIterator &Member = NewMembers[I];
+ MemoryBufferRef MemberRef;
+ if (Member.isNewMember()) {
+ StringRef Filename = Member.getNew();
+ NewMemberStatus.resize(NewMemberStatus.size() + 1);
+ sys::fs::file_status &Status = NewMemberStatus.back();
+ ErrorOr<int> FD = Member.getFD(Status);
+ if (auto EC = FD.getError())
+ return std::make_pair(Filename, EC);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MemberBufferOrErr =
+ MemoryBuffer::getOpenFile(FD.get(), Filename, Status.getSize(),
+ false);
+ if (auto EC = MemberBufferOrErr.getError())
+ return std::make_pair(Filename, EC);
+ if (close(FD.get()) != 0)
+ return std::make_pair(Filename,
+ std::error_code(errno, std::generic_category()));
+ Buffers.push_back(std::move(MemberBufferOrErr.get()));
+ MemberRef = Buffers.back()->getMemBufferRef();
+ } else {
+ object::Archive::child_iterator OldMember = Member.getOld();
+ ErrorOr<MemoryBufferRef> MemberBufferOrErr =
+ OldMember->getMemoryBufferRef();
+ if (auto EC = MemberBufferOrErr.getError())
+ return std::make_pair("", EC);
+ MemberRef = MemberBufferOrErr.get();
+ }
+ Members.push_back(MemberRef);
+ }
+ unsigned MemberReferenceOffset = 0;
+ if (WriteSymtab) {
+ ErrorOr<unsigned> MemberReferenceOffsetOrErr =
+ writeSymbolTable(Out, NewMembers, Members, MemberOffsetRefs);
+ if (auto EC = MemberReferenceOffsetOrErr.getError())
+ return std::make_pair(ArcName, EC);
+ MemberReferenceOffset = MemberReferenceOffsetOrErr.get();
+ }
+ std::vector<unsigned> StringMapIndexes;
+ writeStringTable(Out, NewMembers, StringMapIndexes);
+ unsigned MemberNum = 0;
+ unsigned LongNameMemberNum = 0;
+ unsigned NewMemberNum = 0;
+ std::vector<unsigned> MemberOffset;
+ for (std::vector<NewArchiveIterator>::iterator I = NewMembers.begin(),
+ E = NewMembers.end();
+ I != E; ++I, ++MemberNum) {
+ unsigned Pos = Out.tell();
+ MemberOffset.push_back(Pos);
+ MemoryBufferRef File = Members[MemberNum];
+ if (I->isNewMember()) {
+ StringRef FileName = I->getNew();
+ const sys::fs::file_status &Status = NewMemberStatus[NewMemberNum];
+ NewMemberNum++;
+ StringRef Name = sys::path::filename(FileName);
+ if (Name.size() < 16)
+ printMemberHeader(Out, Name, Status.getLastModificationTime(),
+ Status.getUser(), Status.getGroup(),
+ Status.permissions(), Status.getSize());
+ else
+ printMemberHeader(Out, StringMapIndexes[LongNameMemberNum++],
+ Status.getLastModificationTime(), Status.getUser(),
+ Status.getGroup(), Status.permissions(),
+ Status.getSize());
+ } else {
+ object::Archive::child_iterator OldMember = I->getOld();
+ StringRef Name = I->getName();
+ if (Name.size() < 16)
+ printMemberHeader(Out, Name, OldMember->getLastModified(),
+ OldMember->getUID(), OldMember->getGID(),
+ OldMember->getAccessMode(), OldMember->getSize());
+ else
+ printMemberHeader(Out, StringMapIndexes[LongNameMemberNum++],
+ OldMember->getLastModified(), OldMember->getUID(),
+ OldMember->getGID(), OldMember->getAccessMode(),
+ OldMember->getSize());
+ }
+ Out << File.getBuffer();
+ if (Out.tell() % 2)
+ Out << '\n';
+ }
+ if (MemberReferenceOffset) {
+ for (unsigned MemberNum : MemberOffsetRefs)
+ print32BE(Out, MemberOffset[MemberNum]);
+ }
+ Output.keep();
+ Out.close();
+ sys::fs::rename(TmpArchive, ArcName);
+ return std::make_pair("", std::error_code());
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index 37add22..17aac8b 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -1,5 +1,6 @@
+ ArchiveWriter.cpp
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 74709c8..1055b98 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -46,7 +46,7 @@ static std::error_code checkOffset(MemoryBufferRef M, uintptr_t Addr,
Addr < uintptr_t(M.getBufferStart())) {
return object_error::unexpected_eof;
- return object_error::success;
+ return std::error_code();
// Sets Obj unless any bytes in [addr, addr + size) fall outsize of m.
@@ -59,7 +59,7 @@ static std::error_code getObject(const T *&Obj, MemoryBufferRef M,
if (std::error_code EC = checkOffset(M, Addr, Size))
return EC;
Obj = reinterpret_cast<const T *>(Addr);
- return object_error::success;
+ return std::error_code();
// Decode a string table entry in base 64 (//AAAAAA). Expects \arg Str without
@@ -156,11 +156,11 @@ std::error_code COFFObjectFile::getSymbolAddress(DataRefImpl Ref,
if (Symb.isAnyUndefined()) {
Result = UnknownAddressOrSize;
- return object_error::success;
+ return std::error_code();
if (Symb.isCommon()) {
Result = UnknownAddressOrSize;
- return object_error::success;
+ return std::error_code();
int32_t SectionNumber = Symb.getSectionNumber();
if (!COFF::isReservedSectionNumber(SectionNumber)) {
@@ -169,11 +169,11 @@ std::error_code COFFObjectFile::getSymbolAddress(DataRefImpl Ref,
return EC;
Result = Section->VirtualAddress + Symb.getValue();
- return object_error::success;
+ return std::error_code();
Result = Symb.getValue();
- return object_error::success;
+ return std::error_code();
std::error_code COFFObjectFile::getSymbolType(DataRefImpl Ref,
@@ -205,7 +205,7 @@ std::error_code COFFObjectFile::getSymbolType(DataRefImpl Ref,
Result = SymbolRef::ST_Data;
- return object_error::success;
+ return std::error_code();
uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const {
@@ -236,16 +236,12 @@ uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const {
return Result;
-std::error_code COFFObjectFile::getSymbolSize(DataRefImpl Ref,
- uint64_t &Result) const {
+uint64_t COFFObjectFile::getSymbolSize(DataRefImpl Ref) const {
COFFSymbolRef Symb = getCOFFSymbol(Ref);
if (Symb.isCommon())
- Result = Symb.getValue();
- else
- Result = UnknownAddressOrSize;
- return object_error::success;
+ return Symb.getValue();
+ return UnknownAddressOrSize;
@@ -262,7 +258,7 @@ COFFObjectFile::getSymbolSection(DataRefImpl Ref,
Ref.p = reinterpret_cast<uintptr_t>(Sec);
Result = section_iterator(SectionRef(Ref, this));
- return object_error::success;
+ return std::error_code();
void COFFObjectFile::moveSectionNext(DataRefImpl &Ref) const {
@@ -421,7 +417,7 @@ std::error_code COFFObjectFile::initSymbolTablePtr() {
// Check that the string table is null terminated if has any in it.
if (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0)
return object_error::parse_failed;
- return object_error::success;
+ return std::error_code();
// Returns the file offset for the given VA.
@@ -442,7 +438,7 @@ std::error_code COFFObjectFile::getRvaPtr(uint32_t Addr, uintptr_t &Res) const {
if (SectionStart <= Addr && Addr < SectionEnd) {
uint32_t Offset = Addr - SectionStart;
Res = uintptr_t(base()) + Section->PointerToRawData + Offset;
- return object_error::success;
+ return std::error_code();
return object_error::parse_failed;
@@ -458,7 +454,7 @@ std::error_code COFFObjectFile::getHintName(uint32_t Rva, uint16_t &Hint,
const uint8_t *Ptr = reinterpret_cast<const uint8_t *>(IntPtr);
Hint = *reinterpret_cast<const ulittle16_t *>(Ptr);
Name = StringRef(reinterpret_cast<const char *>(Ptr + 2));
- return object_error::success;
+ return std::error_code();
// Find the import table.
@@ -467,11 +463,11 @@ std::error_code COFFObjectFile::initImportTablePtr() {
// the import table, do nothing.
const data_directory *DataEntry;
if (getDataDirectory(COFF::IMPORT_TABLE, DataEntry))
- return object_error::success;
+ return std::error_code();
// Do nothing if the pointer to import table is NULL.
if (DataEntry->RelativeVirtualAddress == 0)
- return object_error::success;
+ return std::error_code();
uint32_t ImportTableRva = DataEntry->RelativeVirtualAddress;
// -1 because the last entry is the null entry.
@@ -485,16 +481,16 @@ std::error_code COFFObjectFile::initImportTablePtr() {
return EC;
ImportDirectory = reinterpret_cast<
const import_directory_table_entry *>(IntPtr);
- return object_error::success;
+ return std::error_code();
// Initializes DelayImportDirectory and NumberOfDelayImportDirectory.
std::error_code COFFObjectFile::initDelayImportTablePtr() {
const data_directory *DataEntry;
if (getDataDirectory(COFF::DELAY_IMPORT_DESCRIPTOR, DataEntry))
- return object_error::success;
+ return std::error_code();
if (DataEntry->RelativeVirtualAddress == 0)
- return object_error::success;
+ return std::error_code();
uint32_t RVA = DataEntry->RelativeVirtualAddress;
NumberOfDelayImportDirectory = DataEntry->Size /
@@ -505,7 +501,7 @@ std::error_code COFFObjectFile::initDelayImportTablePtr() {
return EC;
DelayImportDirectory = reinterpret_cast<
const delay_import_directory_table_entry *>(IntPtr);
- return object_error::success;
+ return std::error_code();
// Find the export table.
@@ -514,11 +510,11 @@ std::error_code COFFObjectFile::initExportTablePtr() {
// the export table, do nothing.
const data_directory *DataEntry;
if (getDataDirectory(COFF::EXPORT_TABLE, DataEntry))
- return object_error::success;
+ return std::error_code();
// Do nothing if the pointer to export table is NULL.
if (DataEntry->RelativeVirtualAddress == 0)
- return object_error::success;
+ return std::error_code();
uint32_t ExportTableRva = DataEntry->RelativeVirtualAddress;
uintptr_t IntPtr = 0;
@@ -526,15 +522,15 @@ std::error_code COFFObjectFile::initExportTablePtr() {
return EC;
ExportDirectory =
reinterpret_cast<const export_directory_table_entry *>(IntPtr);
- return object_error::success;
+ return std::error_code();
std::error_code COFFObjectFile::initBaseRelocPtr() {
const data_directory *DataEntry;
if (getDataDirectory(COFF::BASE_RELOCATION_TABLE, DataEntry))
- return object_error::success;
+ return std::error_code();
if (DataEntry->RelativeVirtualAddress == 0)
- return object_error::success;
+ return std::error_code();
uintptr_t IntPtr = 0;
if (std::error_code EC = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr))
@@ -543,7 +539,7 @@ std::error_code COFFObjectFile::initBaseRelocPtr() {
BaseRelocEnd = reinterpret_cast<coff_base_reloc_block_header *>(
IntPtr + DataEntry->Size);
- return object_error::success;
+ return std::error_code();
COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC)
@@ -608,7 +604,7 @@ COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC)
if (COFFHeader) {
// The prior checkSize call may have failed. This isn't a hard error
// because we were just trying to sniff out bigobj.
- EC = object_error::success;
+ EC = std::error_code();
CurPtr += sizeof(coff_file_header);
if (COFFHeader->isImportLibrary())
@@ -670,7 +666,7 @@ COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC)
if ((EC = initBaseRelocPtr()))
- EC = object_error::success;
+ EC = std::error_code();
basic_symbol_iterator COFFObjectFile::symbol_begin_impl() const {
@@ -796,13 +792,13 @@ iterator_range<base_reloc_iterator> COFFObjectFile::base_relocs() const {
std::error_code COFFObjectFile::getPE32Header(const pe32_header *&Res) const {
Res = PE32Header;
- return object_error::success;
+ return std::error_code();
COFFObjectFile::getPE32PlusHeader(const pe32plus_header *&Res) const {
Res = PE32PlusHeader;
- return object_error::success;
+ return std::error_code();
@@ -821,18 +817,18 @@ COFFObjectFile::getDataDirectory(uint32_t Index,
return object_error::parse_failed;
Res = &DataDirectory[Index];
- return object_error::success;
+ return std::error_code();
std::error_code COFFObjectFile::getSection(int32_t Index,
const coff_section *&Result) const {
Result = nullptr;
if (COFF::isReservedSectionNumber(Index))
- return object_error::success;
+ return std::error_code();
if (static_cast<uint32_t>(Index) <= getNumberOfSections()) {
// We already verified the section table data, so no need to check again.
Result = SectionTable + (Index - 1);
- return object_error::success;
+ return std::error_code();
return object_error::parse_failed;
@@ -845,7 +841,7 @@ std::error_code COFFObjectFile::getString(uint32_t Offset,
if (Offset >= StringTableSize)
return object_error::unexpected_eof;
Result = StringRef(StringTable + Offset);
- return object_error::success;
+ return std::error_code();
std::error_code COFFObjectFile::getSymbolName(COFFSymbolRef Symbol,
@@ -855,7 +851,7 @@ std::error_code COFFObjectFile::getSymbolName(COFFSymbolRef Symbol,
uint32_t Offset = Symbol.getStringTableOffset().Offset;
if (std::error_code EC = getString(Offset, Res))
return EC;
- return object_error::success;
+ return std::error_code();
if (Symbol.getShortName()[COFF::NameSize - 1] == 0)
@@ -864,7 +860,7 @@ std::error_code COFFObjectFile::getSymbolName(COFFSymbolRef Symbol,
// Not null terminated, use all 8 bytes.
Res = StringRef(Symbol.getShortName(), COFF::NameSize);
- return object_error::success;
+ return std::error_code();
@@ -915,7 +911,7 @@ std::error_code COFFObjectFile::getSectionName(const coff_section *Sec,
Res = Name;
- return object_error::success;
+ return std::error_code();
uint64_t COFFObjectFile::getSectionSize(const coff_section *Sec) const {
@@ -953,7 +949,7 @@ COFFObjectFile::getSectionContents(const coff_section *Sec,
if (checkOffset(Data, ConStart, SectionSize))
return object_error::parse_failed;
Res = makeArrayRef(reinterpret_cast<const uint8_t *>(ConStart), SectionSize);
- return object_error::success;
+ return std::error_code();
const coff_relocation *COFFObjectFile::toRel(DataRefImpl Rel) const {
@@ -978,7 +974,7 @@ std::error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel,
getObject(VirtualAddressPtr, Data, &R->VirtualAddress))
return EC;
Res = *VirtualAddressPtr;
- return object_error::success;
+ return std::error_code();
symbol_iterator COFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
@@ -1012,7 +1008,7 @@ std::error_code COFFObjectFile::getRelocationType(DataRefImpl Rel,
uint64_t &Res) const {
const coff_relocation* R = toRel(Rel);
Res = R->Type;
- return object_error::success;
+ return std::error_code();
const coff_section *
@@ -1113,27 +1109,11 @@ COFFObjectFile::getRelocationTypeName(DataRefImpl Rel,
Res = "Unknown";
Result.append(Res.begin(), Res.end());
- return object_error::success;
+ return std::error_code();
-COFFObjectFile::getRelocationValueString(DataRefImpl Rel,
- SmallVectorImpl<char> &Result) const {
- const coff_relocation *Reloc = toRel(Rel);
- DataRefImpl Sym;
- ErrorOr<COFFSymbolRef> Symb = getSymbol(Reloc->SymbolTableIndex);
- if (std::error_code EC = Symb.getError())
- return EC;
- Sym.p = reinterpret_cast<uintptr_t>(Symb->getRawPtr());
- StringRef SymName;
- if (std::error_code EC = getSymbolName(Sym, SymName))
- return EC;
- Result.append(SymName.begin(), SymName.end());
- return object_error::success;
bool COFFObjectFile::isRelocatableObject() const {
return !DataDirectory;
@@ -1150,7 +1130,7 @@ void ImportDirectoryEntryRef::moveNext() {
std::error_code ImportDirectoryEntryRef::getImportTableEntry(
const import_directory_table_entry *&Result) const {
Result = ImportTable + Index;
- return object_error::success;
+ return std::error_code();
static imported_symbol_iterator
@@ -1212,19 +1192,19 @@ std::error_code ImportDirectoryEntryRef::getName(StringRef &Result) const {
OwningObject->getRvaPtr(ImportTable[Index].NameRVA, IntPtr))
return EC;
Result = StringRef(reinterpret_cast<const char *>(IntPtr));
- return object_error::success;
+ return std::error_code();
ImportDirectoryEntryRef::getImportLookupTableRVA(uint32_t &Result) const {
Result = ImportTable[Index].ImportLookupTableRVA;
- return object_error::success;
+ return std::error_code();
ImportDirectoryEntryRef::getImportAddressTableRVA(uint32_t &Result) const {
Result = ImportTable[Index].ImportAddressTableRVA;
- return object_error::success;
+ return std::error_code();
std::error_code ImportDirectoryEntryRef::getImportLookupEntry(
@@ -1234,7 +1214,7 @@ std::error_code ImportDirectoryEntryRef::getImportLookupEntry(
if (std::error_code EC = OwningObject->getRvaPtr(RVA, IntPtr))
return EC;
Result = reinterpret_cast<const import_lookup_table_entry32 *>(IntPtr);
- return object_error::success;
+ return std::error_code();
bool DelayImportDirectoryEntryRef::
@@ -1268,13 +1248,13 @@ std::error_code DelayImportDirectoryEntryRef::getName(StringRef &Result) const {
if (std::error_code EC = OwningObject->getRvaPtr(Table[Index].Name, IntPtr))
return EC;
Result = StringRef(reinterpret_cast<const char *>(IntPtr));
- return object_error::success;
+ return std::error_code();
std::error_code DelayImportDirectoryEntryRef::
getDelayImportTable(const delay_import_directory_table_entry *&Result) const {
Result = Table;
- return object_error::success;
+ return std::error_code();
std::error_code DelayImportDirectoryEntryRef::
@@ -1288,7 +1268,7 @@ getImportAddress(int AddrIndex, uint64_t &Result) const {
Result = *reinterpret_cast<const ulittle64_t *>(IntPtr);
Result = *reinterpret_cast<const ulittle32_t *>(IntPtr);
- return object_error::success;
+ return std::error_code();
bool ExportDirectoryEntryRef::
@@ -1308,20 +1288,20 @@ std::error_code ExportDirectoryEntryRef::getDllName(StringRef &Result) const {
OwningObject->getRvaPtr(ExportTable->NameRVA, IntPtr))
return EC;
Result = StringRef(reinterpret_cast<const char *>(IntPtr));
- return object_error::success;
+ return std::error_code();
// Returns the starting ordinal number.
ExportDirectoryEntryRef::getOrdinalBase(uint32_t &Result) const {
Result = ExportTable->OrdinalBase;
- return object_error::success;
+ return std::error_code();
// Returns the export ordinal of the current export symbol.
std::error_code ExportDirectoryEntryRef::getOrdinal(uint32_t &Result) const {
Result = ExportTable->OrdinalBase + Index;
- return object_error::success;
+ return std::error_code();
// Returns the address of the current export symbol.
@@ -1333,7 +1313,7 @@ std::error_code ExportDirectoryEntryRef::getExportRVA(uint32_t &Result) const {
const export_address_table_entry *entry =
reinterpret_cast<const export_address_table_entry *>(IntPtr);
Result = entry[Index].ExportRVA;
- return object_error::success;
+ return std::error_code();
// Returns the name of the current export symbol. If the symbol is exported only
@@ -1359,10 +1339,10 @@ ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const {
if (std::error_code EC = OwningObject->getRvaPtr(NamePtr[Offset], IntPtr))
return EC;
Result = StringRef(reinterpret_cast<const char *>(IntPtr));
- return object_error::success;
+ return std::error_code();
Result = "";
- return object_error::success;
+ return std::error_code();
bool ImportedSymbolRef::
@@ -1381,11 +1361,11 @@ ImportedSymbolRef::getSymbolName(StringRef &Result) const {
if (Entry32) {
// If a symbol is imported only by ordinal, it has no name.
if (Entry32[Index].isOrdinal())
- return object_error::success;
+ return std::error_code();
RVA = Entry32[Index].getHintNameRVA();
} else {
if (Entry64[Index].isOrdinal())
- return object_error::success;
+ return std::error_code();
RVA = Entry64[Index].getHintNameRVA();
uintptr_t IntPtr = 0;
@@ -1393,7 +1373,7 @@ ImportedSymbolRef::getSymbolName(StringRef &Result) const {
return EC;
// +2 because the first two bytes is hint.
Result = StringRef(reinterpret_cast<const char *>(IntPtr + 2));
- return object_error::success;
+ return std::error_code();
std::error_code ImportedSymbolRef::getOrdinal(uint16_t &Result) const {
@@ -1401,13 +1381,13 @@ std::error_code ImportedSymbolRef::getOrdinal(uint16_t &Result) const {
if (Entry32) {
if (Entry32[Index].isOrdinal()) {
Result = Entry32[Index].getOrdinal();
- return object_error::success;
+ return std::error_code();
RVA = Entry32[Index].getHintNameRVA();
} else {
if (Entry64[Index].isOrdinal()) {
Result = Entry64[Index].getOrdinal();
- return object_error::success;
+ return std::error_code();
RVA = Entry64[Index].getHintNameRVA();
@@ -1415,7 +1395,7 @@ std::error_code ImportedSymbolRef::getOrdinal(uint16_t &Result) const {
if (std::error_code EC = OwningObject->getRvaPtr(RVA, IntPtr))
return EC;
Result = *reinterpret_cast<const ulittle16_t *>(IntPtr);
- return object_error::success;
+ return std::error_code();
@@ -1452,11 +1432,11 @@ void BaseRelocRef::moveNext() {
std::error_code BaseRelocRef::getType(uint8_t &Type) const {
auto *Entry = reinterpret_cast<const coff_base_reloc_block_entry *>(Header + 1);
Type = Entry[Index].getType();
- return object_error::success;
+ return std::error_code();
std::error_code BaseRelocRef::getRVA(uint32_t &Result) const {
auto *Entry = reinterpret_cast<const coff_base_reloc_block_entry *>(Header + 1);
Result = Header->PageRVA + Entry[Index].getOffset();
- return object_error::success;
+ return std::error_code();
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index 8ccb253..c7df30a 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -27,51 +27,28 @@ ObjectFile::createELFObjectFile(MemoryBufferRef Obj) {
std::size_t MaxAlignment =
1ULL << countTrailingZeros(uintptr_t(Obj.getBufferStart()));
+ if (MaxAlignment < 2)
+ return object_error::parse_failed;
std::error_code EC;
std::unique_ptr<ObjectFile> R;
- if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB)
- if (MaxAlignment >= 4)
- R.reset(new ELFObjectFile<ELFType<support::little, 4, false>>(Obj, EC));
- else
- if (MaxAlignment >= 2)
- R.reset(new ELFObjectFile<ELFType<support::little, 2, false>>(Obj, EC));
- else
- return object_error::parse_failed;
- else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB)
- if (MaxAlignment >= 4)
- R.reset(new ELFObjectFile<ELFType<support::big, 4, false>>(Obj, EC));
- else
- if (MaxAlignment >= 2)
- R.reset(new ELFObjectFile<ELFType<support::big, 2, false>>(Obj, EC));
+ if (Ident.first == ELF::ELFCLASS32) {
+ if (Ident.second == ELF::ELFDATA2LSB)
+ R.reset(new ELFObjectFile<ELFType<support::little, false>>(Obj, EC));
+ else if (Ident.second == ELF::ELFDATA2MSB)
+ R.reset(new ELFObjectFile<ELFType<support::big, false>>(Obj, EC));
return object_error::parse_failed;
- else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB)
- if (MaxAlignment >= 8)
- R.reset(new ELFObjectFile<ELFType<support::big, 8, true>>(Obj, EC));
- else
- if (MaxAlignment >= 2)
- R.reset(new ELFObjectFile<ELFType<support::big, 2, true>>(Obj, EC));
- else
- return object_error::parse_failed;
- else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) {
- if (MaxAlignment >= 8)
- R.reset(new ELFObjectFile<ELFType<support::little, 8, true>>(Obj, EC));
- else
- if (MaxAlignment >= 2)
- R.reset(new ELFObjectFile<ELFType<support::little, 2, true>>(Obj, EC));
+ } else if (Ident.first == ELF::ELFCLASS64) {
+ if (Ident.second == ELF::ELFDATA2LSB)
+ R.reset(new ELFObjectFile<ELFType<support::little, true>>(Obj, EC));
+ else if (Ident.second == ELF::ELFDATA2MSB)
+ R.reset(new ELFObjectFile<ELFType<support::big, true>>(Obj, EC));
return object_error::parse_failed;
+ } else {
+ return object_error::parse_failed;
- else
- llvm_unreachable("Buffer is not an ELF object file!");
if (EC)
return EC;
diff --git a/lib/Object/Error.cpp b/lib/Object/Error.cpp
index d2daab7..644a178 100644
--- a/lib/Object/Error.cpp
+++ b/lib/Object/Error.cpp
@@ -33,7 +33,6 @@ const char *_object_error_category::name() const LLVM_NOEXCEPT {
std::string _object_error_category::message(int EV) const {
object_error E = static_cast<object_error>(EV);
switch (E) {
- case object_error::success: return "Success";
case object_error::arch_not_found:
return "No object file for requested architecture";
case object_error::invalid_file_type:
@@ -44,6 +43,12 @@ std::string _object_error_category::message(int EV) const {
return "The end of the file was unexpectedly encountered";
case object_error::bitcode_section_not_found:
return "Bitcode section not found in object file";
+ case object_error::macho_small_load_command:
+ return "Mach-O load command with size < 8 bytes";
+ case object_error::macho_load_segment_too_many_sections:
+ return "Mach-O segment load command contains too many sections";
+ case object_error::macho_load_segment_too_small:
+ return "Mach-O segment load command size is too small";
llvm_unreachable("An enumerator of object_error does not have a message "
diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp
index c12c5d4..e89cb8e 100644
--- a/lib/Object/IRObjectFile.cpp
+++ b/lib/Object/IRObjectFile.cpp
@@ -195,7 +195,7 @@ std::error_code IRObjectFile::printSymbolName(raw_ostream &OS,
unsigned Index = getAsmSymIndex(Symb);
assert(Index <= AsmSymbols.size());
OS << AsmSymbols[Index].first;
- return object_error::success;;
+ return std::error_code();
if (Mang)
@@ -203,7 +203,7 @@ std::error_code IRObjectFile::printSymbolName(raw_ostream &OS,
OS << GV->getName();
- return object_error::success;
+ return std::error_code();
uint32_t IRObjectFile::getSymbolFlags(DataRefImpl Symb) const {
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 79f8100..d02ca48a 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -38,6 +38,7 @@ namespace {
+// FIXME: Replace all uses of this function with getStructOrErr.
template <typename T>
static T getStruct(const MachOObjectFile *O, const char *P) {
// Don't read before the beginning or past the end of the file
@@ -51,39 +52,19 @@ static T getStruct(const MachOObjectFile *O, const char *P) {
return Cmd;
-template <typename SegmentCmd>
-static uint32_t getSegmentLoadCommandNumSections(const SegmentCmd &S,
- uint32_t Cmdsize) {
- const unsigned SectionSize = sizeof(SegmentCmd);
- if (S.nsects > std::numeric_limits<uint32_t>::max() / SectionSize ||
- S.nsects * SectionSize > Cmdsize - sizeof(S))
- report_fatal_error(
- "Number of sections too large for size of load command.");
- return S.nsects;
-static uint32_t
-getSegmentLoadCommandNumSections(const MachOObjectFile *O,
- const MachOObjectFile::LoadCommandInfo &L) {
- if (O->is64Bit())
- return getSegmentLoadCommandNumSections(O->getSegment64LoadCommand(L),
- L.C.cmdsize);
- return getSegmentLoadCommandNumSections(O->getSegmentLoadCommand(L),
- L.C.cmdsize);
+template <typename T>
+static ErrorOr<T> getStructOrErr(const MachOObjectFile *O, const char *P) {
+ // Don't read before the beginning or past the end of the file
+ if (P < O->getData().begin() || P + sizeof(T) > O->getData().end())
+ return object_error::parse_failed;
-static bool isPageZeroSegment(const MachOObjectFile *O,
- const MachOObjectFile::LoadCommandInfo &L) {
- if (O->is64Bit()) {
- MachO::segment_command_64 S = O->getSegment64LoadCommand(L);
- return StringRef("__PAGEZERO").equals(S.segname);
- }
- MachO::segment_command S = O->getSegmentLoadCommand(L);
- return StringRef("__PAGEZERO").equals(S.segname);
+ T Cmd;
+ memcpy(&Cmd, P, sizeof(T));
+ if (O->isLittleEndian() != sys::IsLittleEndianHost)
+ MachO::swapStruct(Cmd);
+ return Cmd;
static const char *
getSectionPtr(const MachOObjectFile *O, MachOObjectFile::LoadCommandInfo L,
unsigned Sec) {
@@ -128,70 +109,6 @@ static unsigned getCPUType(const MachOObjectFile *O) {
return O->getHeader().cputype;
-static void printRelocationTargetName(const MachOObjectFile *O,
- const MachO::any_relocation_info &RE,
- raw_string_ostream &fmt) {
- bool IsScattered = O->isRelocationScattered(RE);
- // Target of a scattered relocation is an address. In the interest of
- // generating pretty output, scan through the symbol table looking for a
- // symbol that aligns with that address. If we find one, print it.
- // Otherwise, we just print the hex address of the target.
- if (IsScattered) {
- uint32_t Val = O->getPlainRelocationSymbolNum(RE);
- for (const SymbolRef &Symbol : O->symbols()) {
- std::error_code ec;
- uint64_t Addr;
- StringRef Name;
- if ((ec = Symbol.getAddress(Addr)))
- report_fatal_error(ec.message());
- if (Addr != Val)
- continue;
- if ((ec = Symbol.getName(Name)))
- report_fatal_error(ec.message());
- fmt << Name;
- return;
- }
- // If we couldn't find a symbol that this relocation refers to, try
- // to find a section beginning instead.
- for (const SectionRef &Section : O->sections()) {
- std::error_code ec;
- StringRef Name;
- uint64_t Addr = Section.getAddress();
- if (Addr != Val)
- continue;
- if ((ec = Section.getName(Name)))
- report_fatal_error(ec.message());
- fmt << Name;
- return;
- }
- fmt << format("0x%x", Val);
- return;
- }
- StringRef S;
- bool isExtern = O->getPlainRelocationExternal(RE);
- uint64_t Val = O->getPlainRelocationSymbolNum(RE);
- if (isExtern) {
- symbol_iterator SI = O->symbol_begin();
- advance(SI, Val);
- SI->getName(S);
- } else {
- section_iterator SI = O->section_begin();
- // Adjust for the fact that sections are 1-indexed.
- advance(SI, Val - 1);
- SI->getName(S);
- }
- fmt << S;
static uint32_t
getPlainRelocationAddress(const MachO::any_relocation_info &RE) {
return RE.r_word0;
@@ -244,6 +161,69 @@ static uint32_t getSectionFlags(const MachOObjectFile *O,
return Sect.flags;
+static ErrorOr<MachOObjectFile::LoadCommandInfo>
+getLoadCommandInfo(const MachOObjectFile *Obj, const char *Ptr) {
+ auto CmdOrErr = getStructOrErr<MachO::load_command>(Obj, Ptr);
+ if (!CmdOrErr)
+ return CmdOrErr.getError();
+ if (CmdOrErr->cmdsize < 8)
+ return object_error::macho_small_load_command;
+ MachOObjectFile::LoadCommandInfo Load;
+ Load.Ptr = Ptr;
+ Load.C = CmdOrErr.get();
+ return Load;
+static ErrorOr<MachOObjectFile::LoadCommandInfo>
+getFirstLoadCommandInfo(const MachOObjectFile *Obj) {
+ unsigned HeaderSize = Obj->is64Bit() ? sizeof(MachO::mach_header_64)
+ : sizeof(MachO::mach_header);
+ return getLoadCommandInfo(Obj, getPtr(Obj, HeaderSize));
+static ErrorOr<MachOObjectFile::LoadCommandInfo>
+getNextLoadCommandInfo(const MachOObjectFile *Obj,
+ const MachOObjectFile::LoadCommandInfo &L) {
+ return getLoadCommandInfo(Obj, L.Ptr + L.C.cmdsize);
+template <typename T>
+static void parseHeader(const MachOObjectFile *Obj, T &Header,
+ std::error_code &EC) {
+ auto HeaderOrErr = getStructOrErr<T>(Obj, getPtr(Obj, 0));
+ if (HeaderOrErr)
+ Header = HeaderOrErr.get();
+ else
+ EC = HeaderOrErr.getError();
+// Parses LC_SEGMENT or LC_SEGMENT_64 load command, adds addresses of all
+// sections to \param Sections, and optionally sets
+// \param IsPageZeroSegment to true.
+template <typename SegmentCmd>
+static std::error_code parseSegmentLoadCommand(
+ const MachOObjectFile *Obj, const MachOObjectFile::LoadCommandInfo &Load,
+ SmallVectorImpl<const char *> &Sections, bool &IsPageZeroSegment) {
+ const unsigned SegmentLoadSize = sizeof(SegmentCmd);
+ if (Load.C.cmdsize < SegmentLoadSize)
+ return object_error::macho_load_segment_too_small;
+ auto SegOrErr = getStructOrErr<SegmentCmd>(Obj, Load.Ptr);
+ if (!SegOrErr)
+ return SegOrErr.getError();
+ SegmentCmd S = SegOrErr.get();
+ const unsigned SectionSize =
+ Obj->is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section);
+ if (S.nsects > std::numeric_limits<uint32_t>::max() / SectionSize ||
+ S.nsects * SectionSize > Load.C.cmdsize - SegmentLoadSize)
+ return object_error::macho_load_segment_too_many_sections;
+ for (unsigned J = 0; J < S.nsects; ++J) {
+ const char *Sec = getSectionPtr(Obj, Load, J);
+ Sections.push_back(Sec);
+ }
+ IsPageZeroSegment |= StringRef("__PAGEZERO").equals(S.segname);
+ return std::error_code();
MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
bool Is64bits, std::error_code &EC)
: ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
@@ -251,15 +231,25 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
DataInCodeLoadCmd(nullptr), LinkOptHintsLoadCmd(nullptr),
DyldInfoLoadCmd(nullptr), UuidLoadCmd(nullptr),
HasPageZeroSegment(false) {
- uint32_t LoadCommandCount = this->getHeader().ncmds;
- if (LoadCommandCount == 0)
+ if (is64Bit())
+ parseHeader(this, Header64, EC);
+ else
+ parseHeader(this, Header, EC);
+ if (EC)
- MachO::LoadCommandType SegmentLoadType = is64Bit() ?
+ uint32_t LoadCommandCount = getHeader().ncmds;
+ if (LoadCommandCount == 0)
+ return;
- MachOObjectFile::LoadCommandInfo Load = getFirstLoadCommandInfo();
- for (unsigned I = 0; ; ++I) {
+ auto LoadOrErr = getFirstLoadCommandInfo(this);
+ if (!LoadOrErr) {
+ EC = LoadOrErr.getError();
+ return;
+ }
+ LoadCommandInfo Load = LoadOrErr.get();
+ for (unsigned I = 0; I < LoadCommandCount; ++I) {
+ LoadCommands.push_back(Load);
if (Load.C.cmd == MachO::LC_SYMTAB) {
// Multiple symbol tables
if (SymtabLoadCmd) {
@@ -303,20 +293,14 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
UuidLoadCmd = Load.Ptr;
- } else if (Load.C.cmd == SegmentLoadType) {
- const unsigned SegmentLoadSize = this->is64Bit()
- ? sizeof(MachO::segment_command_64)
- : sizeof(MachO::segment_command);
- if (Load.C.cmdsize < SegmentLoadSize)
- report_fatal_error("Segment load command size is too small.");
- uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load);
- for (unsigned J = 0; J < NumSections; ++J) {
- const char *Sec = getSectionPtr(this, Load, J);
- Sections.push_back(Sec);
- }
- if (isPageZeroSegment(this, Load))
- HasPageZeroSegment = true;
+ } else if (Load.C.cmd == MachO::LC_SEGMENT_64) {
+ if ((EC = parseSegmentLoadCommand<MachO::segment_command_64>(
+ this, Load, Sections, HasPageZeroSegment)))
+ return;
+ } else if (Load.C.cmd == MachO::LC_SEGMENT) {
+ if ((EC = parseSegmentLoadCommand<MachO::segment_command>(
+ this, Load, Sections, HasPageZeroSegment)))
+ return;
} else if (Load.C.cmd == MachO::LC_LOAD_DYLIB ||
Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB ||
Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB ||
@@ -324,12 +308,16 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
Load.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB) {
- if (I == LoadCommandCount - 1)
- break;
- else
- Load = getNextLoadCommandInfo(Load);
+ if (I < LoadCommandCount - 1) {
+ auto LoadOrErr = getNextLoadCommandInfo(this, Load);
+ if (!LoadOrErr) {
+ EC = LoadOrErr.getError();
+ return;
+ }
+ Load = LoadOrErr.get();
+ }
+ assert(LoadCommands.size() == LoadCommandCount);
void MachOObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
@@ -348,7 +336,7 @@ std::error_code MachOObjectFile::getSymbolName(DataRefImpl Symb,
"Symbol name entry points before beginning or past end of file.");
Res = StringRef(Start);
- return object_error::success;
+ return std::error_code();
unsigned MachOObjectFile::getSectionType(SectionRef Sec) const {
@@ -378,7 +366,7 @@ std::error_code MachOObjectFile::getIndirectName(DataRefImpl Symb,
return object_error::parse_failed;
const char *Start = &[NValue];
Res = StringRef(Start);
- return object_error::success;
+ return std::error_code();
std::error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb,
@@ -398,31 +386,25 @@ std::error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb,
Res = Entry.n_value;
- return object_error::success;
+ return std::error_code();
-std::error_code MachOObjectFile::getSymbolAlignment(DataRefImpl DRI,
- uint32_t &Result) const {
+uint32_t MachOObjectFile::getSymbolAlignment(DataRefImpl DRI) const {
uint32_t flags = getSymbolFlags(DRI);
if (flags & SymbolRef::SF_Common) {
MachO::nlist_base Entry = getSymbolTableEntryBase(this, DRI);
- Result = 1 << MachO::GET_COMM_ALIGN(Entry.n_desc);
- } else {
- Result = 0;
+ return 1 << MachO::GET_COMM_ALIGN(Entry.n_desc);
- return object_error::success;
+ return 0;
-std::error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
- uint64_t &Result) const {
+uint64_t MachOObjectFile::getSymbolSize(DataRefImpl DRI) const {
uint64_t Value;
getSymbolAddress(DRI, Value);
uint32_t flags = getSymbolFlags(DRI);
if (flags & SymbolRef::SF_Common)
- Result = Value;
- else
- Result = UnknownAddressOrSize;
- return object_error::success;
+ return Value;
+ return UnknownAddressOrSize;
std::error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
@@ -435,7 +417,7 @@ std::error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
// If this is a STAB debugging symbol, we can do nothing more.
if (n_type & MachO::N_STAB) {
Res = SymbolRef::ST_Debug;
- return object_error::success;
+ return std::error_code();
switch (n_type & MachO::N_TYPE) {
@@ -446,7 +428,7 @@ std::error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
Res = SymbolRef::ST_Function;
- return object_error::success;
+ return std::error_code();
uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
@@ -506,7 +488,7 @@ std::error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb,
Res = section_iterator(SectionRef(DRI, this));
- return object_error::success;
+ return std::error_code();
void MachOObjectFile::moveSectionNext(DataRefImpl &Sec) const {
@@ -517,7 +499,7 @@ std::error_code MachOObjectFile::getSectionName(DataRefImpl Sec,
StringRef &Result) const {
ArrayRef<char> Raw = getSectionRawName(Sec);
Result = parseSegmentOrSectionName(;
- return object_error::success;
+ return std::error_code();
uint64_t MachOObjectFile::getSectionAddress(DataRefImpl Sec) const {
@@ -548,7 +530,7 @@ std::error_code MachOObjectFile::getSectionContents(DataRefImpl Sec,
Res = this->getData().substr(Offset, Size);
- return object_error::success;
+ return std::error_code();
uint64_t MachOObjectFile::getSectionAlignment(DataRefImpl Sec) const {
@@ -643,7 +625,7 @@ std::error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel,
Sec.d.a = Rel.d.a;
uint64_t SecAddress = getSectionAddress(Sec);
Res = SecAddress + Offset;
- return object_error::success;
+ return std::error_code();
std::error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel,
@@ -652,7 +634,7 @@ std::error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel,
"Only implemented for MH_OBJECT");
MachO::any_relocation_info RE = getRelocation(Rel);
Res = getAnyRelocationAddress(RE);
- return object_error::success;
+ return std::error_code();
@@ -685,7 +667,7 @@ std::error_code MachOObjectFile::getRelocationType(DataRefImpl Rel,
uint64_t &Res) const {
MachO::any_relocation_info RE = getRelocation(Rel);
Res = getAnyRelocationType(RE);
- return object_error::success;
+ return std::error_code();
@@ -797,183 +779,7 @@ MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
Result.append(res.begin(), res.end());
- return object_error::success;
-MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
- SmallVectorImpl<char> &Result) const {
- MachO::any_relocation_info RE = getRelocation(Rel);
- unsigned Arch = this->getArch();
- std::string fmtbuf;
- raw_string_ostream fmt(fmtbuf);
- unsigned Type = this->getAnyRelocationType(RE);
- bool IsPCRel = this->getAnyRelocationPCRel(RE);
- // Determine any addends that should be displayed with the relocation.
- // These require decoding the relocation type, which is triple-specific.
- // X86_64 has entirely custom relocation types.
- if (Arch == Triple::x86_64) {
- bool isPCRel = getAnyRelocationPCRel(RE);
- switch (Type) {
- case MachO::X86_64_RELOC_GOT_LOAD:
- case MachO::X86_64_RELOC_GOT: {
- printRelocationTargetName(this, RE, fmt);
- fmt << "@GOT";
- if (isPCRel) fmt << "PCREL";
- break;
- }
- case MachO::X86_64_RELOC_SUBTRACTOR: {
- DataRefImpl RelNext = Rel;
- moveRelocationNext(RelNext);
- MachO::any_relocation_info RENext = getRelocation(RelNext);
- // X86_64_RELOC_SUBTRACTOR must be followed by a relocation of type
- // NOTE: Scattered relocations don't exist on x86_64.
- unsigned RType = getAnyRelocationType(RENext);
- if (RType != MachO::X86_64_RELOC_UNSIGNED)
- report_fatal_error("Expected X86_64_RELOC_UNSIGNED after "
- // The X86_64_RELOC_UNSIGNED contains the minuend symbol;
- // X86_64_RELOC_SUBTRACTOR contains the subtrahend.
- printRelocationTargetName(this, RENext, fmt);
- fmt << "-";
- printRelocationTargetName(this, RE, fmt);
- break;
- }
- case MachO::X86_64_RELOC_TLV:
- printRelocationTargetName(this, RE, fmt);
- fmt << "@TLV";
- if (isPCRel) fmt << "P";
- break;
- case MachO::X86_64_RELOC_SIGNED_1:
- printRelocationTargetName(this, RE, fmt);
- fmt << "-1";
- break;
- case MachO::X86_64_RELOC_SIGNED_2:
- printRelocationTargetName(this, RE, fmt);
- fmt << "-2";
- break;
- case MachO::X86_64_RELOC_SIGNED_4:
- printRelocationTargetName(this, RE, fmt);
- fmt << "-4";
- break;
- default:
- printRelocationTargetName(this, RE, fmt);
- break;
- }
- // X86 and ARM share some relocation types in common.
- } else if (Arch == Triple::x86 || Arch == Triple::arm ||
- Arch == Triple::ppc) {
- // Generic relocation types...
- switch (Type) {
- case MachO::GENERIC_RELOC_PAIR: // prints no info
- return object_error::success;
- DataRefImpl RelNext = Rel;
- moveRelocationNext(RelNext);
- MachO::any_relocation_info RENext = getRelocation(RelNext);
- // X86 sect diff's must be followed by a relocation of type
- unsigned RType = getAnyRelocationType(RENext);
- if (RType != MachO::GENERIC_RELOC_PAIR)
- report_fatal_error("Expected GENERIC_RELOC_PAIR after "
- printRelocationTargetName(this, RE, fmt);
- fmt << "-";
- printRelocationTargetName(this, RENext, fmt);
- break;
- }
- }
- if (Arch == Triple::x86 || Arch == Triple::ppc) {
- switch (Type) {
- DataRefImpl RelNext = Rel;
- moveRelocationNext(RelNext);
- MachO::any_relocation_info RENext = getRelocation(RelNext);
- // X86 sect diff's must be followed by a relocation of type
- unsigned RType = getAnyRelocationType(RENext);
- if (RType != MachO::GENERIC_RELOC_PAIR)
- report_fatal_error("Expected GENERIC_RELOC_PAIR after "
- printRelocationTargetName(this, RE, fmt);
- fmt << "-";
- printRelocationTargetName(this, RENext, fmt);
- break;
- }
- case MachO::GENERIC_RELOC_TLV: {
- printRelocationTargetName(this, RE, fmt);
- fmt << "@TLV";
- if (IsPCRel) fmt << "P";
- break;
- }
- default:
- printRelocationTargetName(this, RE, fmt);
- }
- } else { // ARM-specific relocations
- switch (Type) {
- case MachO::ARM_RELOC_HALF:
- // Half relocations steal a bit from the length field to encode
- // whether this is an upper16 or a lower16 relocation.
- bool isUpper = getAnyRelocationLength(RE) >> 1;
- if (isUpper)
- fmt << ":upper16:(";
- else
- fmt << ":lower16:(";
- printRelocationTargetName(this, RE, fmt);
- DataRefImpl RelNext = Rel;
- moveRelocationNext(RelNext);
- MachO::any_relocation_info RENext = getRelocation(RelNext);
- // ARM half relocs must be followed by a relocation of type
- unsigned RType = getAnyRelocationType(RENext);
- if (RType != MachO::ARM_RELOC_PAIR)
- report_fatal_error("Expected ARM_RELOC_PAIR after "
- // NOTE: The half of the target virtual address is stashed in the
- // address field of the secondary relocation, but we can't reverse
- // engineer the constant offset from it without decoding the movw/movt
- // instruction to find the other half in its immediate field.
- // ARM_RELOC_HALF_SECTDIFF encodes the second section in the
- // symbol/section pointer of the follow-on relocation.
- if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) {
- fmt << "-";
- printRelocationTargetName(this, RENext, fmt);
- }
- fmt << ")";
- break;
- }
- default: {
- printRelocationTargetName(this, RE, fmt);
- }
- }
- }
- } else
- printRelocationTargetName(this, RE, fmt);
- fmt.flush();
- Result.append(fmtbuf.begin(), fmtbuf.end());
- return object_error::success;
+ return std::error_code();
std::error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel,
@@ -1001,7 +807,12 @@ std::error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel,
- return object_error::success;
+ return std::error_code();
+uint8_t MachOObjectFile::getRelocationLength(DataRefImpl Rel) const {
+ MachO::any_relocation_info RE = getRelocation(Rel);
+ return getAnyRelocationLength(RE);
@@ -1179,7 +990,7 @@ std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index,
Res = LibrariesShortNames[Index];
- return object_error::success;
+ return std::error_code();
basic_symbol_iterator MachOObjectFile::symbol_begin_impl() const {
@@ -1437,21 +1248,8 @@ unsigned MachOObjectFile::getArch() const {
Triple MachOObjectFile::getArch(const char **McpuDefault,
Triple *ThumbTriple) const {
- Triple T;
- if (is64Bit()) {
- MachO::mach_header_64 H_64;
- H_64 = getHeader64();
- T = MachOObjectFile::getArch(H_64.cputype, H_64.cpusubtype, McpuDefault);
- *ThumbTriple = MachOObjectFile::getThumbArch(H_64.cputype, H_64.cpusubtype,
- McpuDefault);
- } else {
- MachO::mach_header H;
- H = getHeader();
- T = MachOObjectFile::getArch(H.cputype, H.cpusubtype, McpuDefault);
- *ThumbTriple = MachOObjectFile::getThumbArch(H.cputype, H.cpusubtype,
- McpuDefault);
- }
- return T;
+ *ThumbTriple = getThumbArch(Header.cputype, Header.cpusubtype, McpuDefault);
+ return getArch(Header.cputype, Header.cpusubtype, McpuDefault);
relocation_iterator MachOObjectFile::section_rel_begin(unsigned Index) const {
@@ -2104,6 +1902,22 @@ iterator_range<bind_iterator> MachOObjectFile::weakBindTable() const {
+MachOObjectFile::begin_load_commands() const {
+ return LoadCommands.begin();
+MachOObjectFile::end_load_commands() const {
+ return LoadCommands.end();
+MachOObjectFile::load_commands() const {
+ return iterator_range<load_command_iterator>(begin_load_commands(),
+ end_load_commands());
MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec);
@@ -2203,29 +2017,6 @@ MachOObjectFile::getAnyRelocationSection(
return SectionRef(DRI, this);
-MachOObjectFile::getFirstLoadCommandInfo() const {
- MachOObjectFile::LoadCommandInfo Load;
- unsigned HeaderSize = is64Bit() ? sizeof(MachO::mach_header_64) :
- sizeof(MachO::mach_header);
- Load.Ptr = getPtr(this, HeaderSize);
- Load.C = getStruct<MachO::load_command>(this, Load.Ptr);
- if (Load.C.cmdsize < 8)
- report_fatal_error("Load command with size < 8 bytes.");
- return Load;
-MachOObjectFile::getNextLoadCommandInfo(const LoadCommandInfo &L) const {
- MachOObjectFile::LoadCommandInfo Next;
- Next.Ptr = L.Ptr + L.C.cmdsize;
- Next.C = getStruct<MachO::load_command>(this, Next.Ptr);
- if (Next.C.cmdsize < 8)
- report_fatal_error("Load command with size < 8 bytes.");
- return Next;
MachO::section MachOObjectFile::getSection(DataRefImpl DRI) const {
assert(DRI.d.a < Sections.size() && "Should have detected this earlier");
return getStruct<MachO::section>(this, Sections[DRI.d.a]);
@@ -2390,12 +2181,13 @@ MachOObjectFile::getDice(DataRefImpl Rel) const {
return getStruct<MachO::data_in_code_entry>(this, P);
-MachO::mach_header MachOObjectFile::getHeader() const {
- return getStruct<MachO::mach_header>(this, getPtr(this, 0));
+const MachO::mach_header &MachOObjectFile::getHeader() const {
+ return Header;
-MachO::mach_header_64 MachOObjectFile::getHeader64() const {
- return getStruct<MachO::mach_header_64>(this, getPtr(this, 0));
+const MachO::mach_header_64 &MachOObjectFile::getHeader64() const {
+ assert(is64Bit());
+ return Header64;
uint32_t MachOObjectFile::getIndirectSymbolTableEntry(
diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp
index a01c838..2705e7d 100644
--- a/lib/Object/MachOUniversal.cpp
+++ b/lib/Object/MachOUniversal.cpp
@@ -120,7 +120,7 @@ MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source,
ec = object_error::parse_failed;
- ec = object_error::success;
+ ec = std::error_code();
static bool getCTMForArch(Triple::ArchType Arch, MachO::CPUType &CTM) {
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index 84a5df0..85f2436 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -187,10 +187,7 @@ uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) {
uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) {
- uint64_t ret;
- if (std::error_code ec = (*unwrap(SI))->getSize(ret))
- report_fatal_error(ec.message());
- return ret;
+ return (*unwrap(SI))->getSize();
// RelocationRef accessors
@@ -233,12 +230,6 @@ const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI) {
// NOTE: Caller takes ownership of returned string.
const char *LLVMGetRelocationValueString(LLVMRelocationIteratorRef RI) {
- SmallVector<char, 0> ret;
- if (std::error_code ec = (*unwrap(RI))->getValueString(ret))
- report_fatal_error(ec.message());
- char *str = static_cast<char*>(malloc(ret.size()));
- std::copy(ret.begin(), ret.end(), str);
- return str;
+ return strdup("");
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index 01b7654..f6667d9 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -34,14 +34,10 @@ std::error_code ObjectFile::printSymbolName(raw_ostream &OS,
if (std::error_code EC = getSymbolName(Symb, Name))
return EC;
OS << Name;
- return object_error::success;
+ return std::error_code();
-std::error_code ObjectFile::getSymbolAlignment(DataRefImpl DRI,
- uint32_t &Result) const {
- Result = 0;
- return object_error::success;
+uint32_t ObjectFile::getSymbolAlignment(DataRefImpl DRI) const { return 0; }
section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const {
return section_iterator(SectionRef(Sec, this));
diff --git a/lib/ProfileData/CoverageMappingReader.cpp b/lib/ProfileData/CoverageMappingReader.cpp
index cf6cd58..ec531c3 100644
--- a/lib/ProfileData/CoverageMappingReader.cpp
+++ b/lib/ProfileData/CoverageMappingReader.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -358,8 +359,12 @@ std::error_code readCoverageMappingData(
const char *CovBuf = Buf;
Buf += CoverageSize;
const char *CovEnd = Buf;
if (Buf > End)
return coveragemap_error::malformed;
+ // Each coverage map has an alignment of 8, so we need to adjust alignment
+ // before reading the next map.
+ Buf += alignmentAdjustment(Buf, 8);
while (FunBuf < FunEnd) {
// Read the function information
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index eb99242..47751fc 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -49,9 +49,9 @@ static ManagedStatic<std::vector<std::string>> CurrentDebugType;
bool isCurrentDebugType(const char *DebugType) {
if (CurrentDebugType->empty())
return true;
- // see if DebugType is in list. Note: do not use find() as that forces us to
+ // See if DebugType is in list. Note: do not use find() as that forces us to
// unnecessarily create an std::string instance.
- for (auto d : *CurrentDebugType) {
+ for (auto &d : *CurrentDebugType) {
if (d == DebugType)
return true;
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
index d2b551e..9a7aeb5 100644
--- a/lib/Support/DynamicLibrary.cpp
+++ b/lib/Support/DynamicLibrary.cpp
@@ -178,3 +178,12 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) {
LLVMBool LLVMLoadLibraryPermanently(const char* Filename) {
return llvm::sys::DynamicLibrary::LoadLibraryPermanently(Filename);
+void *LLVMSearchForAddressOfSymbol(const char *symbolName) {
+ return llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(symbolName);
+void LLVMAddSymbol(const char *symbolName, void *symbolValue) {
+ return llvm::sys::DynamicLibrary::AddSymbol(symbolName, symbolValue);
diff --git a/lib/Support/SmallVector.cpp b/lib/Support/SmallVector.cpp
index f9c0e78..b931505b 100644
--- a/lib/Support/SmallVector.cpp
+++ b/lib/Support/SmallVector.cpp
@@ -33,6 +33,7 @@ void SmallVectorBase::grow_pod(void *FirstEl, size_t MinSizeInBytes,
// If this wasn't grown from the inline copy, grow the allocated space.
NewElts = realloc(this->BeginX, NewCapacityInBytes);
+ assert(NewElts && "Out of memory");
this->EndX = (char*)NewElts+CurSizeBytes;
this->BeginX = NewElts;
diff --git a/lib/Support/TargetParser.cpp b/lib/Support/TargetParser.cpp
index a3998d2..757483b 100644
--- a/lib/Support/TargetParser.cpp
+++ b/lib/Support/TargetParser.cpp
@@ -22,72 +22,88 @@ using namespace llvm;
namespace {
-// List of canonical FPU names (use getFPUSynonym)
+// List of canonical FPU names (use getFPUSynonym) and which architectural
+// features they correspond to (use getFPUFeatures).
// FIXME: TableGen this.
struct {
const char * Name;
+ unsigned FPUVersion; ///< Corresponds directly to the FP arch version number.
+ ARM::NeonSupportLevel NeonSupport;
+ ARM::FPURestriction Restriction;
} FPUNames[] = {
- { "invalid", ARM::FK_INVALID },
- { "vfp", ARM::FK_VFP },
- { "vfpv2", ARM::FK_VFPV2 },
- { "vfpv3", ARM::FK_VFPV3 },
- { "vfpv3-d16", ARM::FK_VFPV3_D16 },
- { "vfpv4", ARM::FK_VFPV4 },
- { "vfpv4-d16", ARM::FK_VFPV4_D16 },
- { "fpv5-d16", ARM::FK_FPV5_D16 },
- { "fp-armv8", ARM::FK_FP_ARMV8 },
- { "neon", ARM::FK_NEON },
- { "neon-vfpv4", ARM::FK_NEON_VFPV4 },
- { "neon-fp-armv8", ARM::FK_NEON_FP_ARMV8 },
- { "crypto-neon-fp-armv8", ARM::FK_CRYPTO_NEON_FP_ARMV8 },
- { "softvfp", ARM::FK_SOFTVFP }
+ { "invalid", ARM::FK_INVALID, 0, ARM::NS_None, ARM::FR_None},
+ { "none", ARM::FK_NONE, 0, ARM::NS_None, ARM::FR_None},
+ { "vfp", ARM::FK_VFP, 2, ARM::NS_None, ARM::FR_None},
+ { "vfpv2", ARM::FK_VFPV2, 2, ARM::NS_None, ARM::FR_None},
+ { "vfpv3", ARM::FK_VFPV3, 3, ARM::NS_None, ARM::FR_None},
+ { "vfpv3-d16", ARM::FK_VFPV3_D16, 3, ARM::NS_None, ARM::FR_D16},
+ { "vfpv4", ARM::FK_VFPV4, 4, ARM::NS_None, ARM::FR_None},
+ { "vfpv4-d16", ARM::FK_VFPV4_D16, 4, ARM::NS_None, ARM::FR_D16},
+ { "fpv4-sp-d16", ARM::FK_FPV4_SP_D16, 4, ARM::NS_None, ARM::FR_SP_D16},
+ { "fpv5-d16", ARM::FK_FPV5_D16, 5, ARM::NS_None, ARM::FR_D16},
+ { "fpv5-sp-d16", ARM::FK_FPV5_SP_D16, 5, ARM::NS_None, ARM::FR_SP_D16},
+ { "fp-armv8", ARM::FK_FP_ARMV8, 5, ARM::NS_None, ARM::FR_None},
+ { "neon", ARM::FK_NEON, 3, ARM::NS_Neon, ARM::FR_None},
+ { "neon-vfpv4", ARM::FK_NEON_VFPV4, 4, ARM::NS_Neon, ARM::FR_None},
+ { "neon-fp-armv8", ARM::FK_NEON_FP_ARMV8, 5, ARM::NS_Neon, ARM::FR_None},
+ { "crypto-neon-fp-armv8",
+ { "softvfp", ARM::FK_SOFTVFP, 0, ARM::NS_None, ARM::FR_None},
-// List of canonical arch names (use getArchSynonym)
+// List of canonical arch names (use getArchSynonym).
+// This table also provides the build attribute fields for CPU arch
+// and Arch ID, according to the Addenda to the ARM ABI, chapters
+// 2.4 and respectively.
+// FIXME: SubArch values were simplified to fit into the expectations
+// of the triples and are not conforming with their official names.
+// Check to see if the expectation should be changed.
// FIXME: TableGen this.
struct {
const char *Name;
ARM::ArchKind ID;
- const char *DefaultCPU;
- ARMBuildAttrs::CPUArch DefaultArch;
+ const char *CPUAttr; // CPU class in build attributes.
+ const char *SubArch; // Sub-Arch name.
+ ARMBuildAttrs::CPUArch ArchAttr; // Arch ID in build attributes.
} ARCHNames[] = {
- { "invalid", ARM::AK_INVALID, nullptr, ARMBuildAttrs::CPUArch::Pre_v4 },
- { "armv2", ARM::AK_ARMV2, "2", ARMBuildAttrs::CPUArch::v4 },
- { "armv2a", ARM::AK_ARMV2A, "2A", ARMBuildAttrs::CPUArch::v4 },
- { "armv3", ARM::AK_ARMV3, "3", ARMBuildAttrs::CPUArch::v4 },
- { "armv3m", ARM::AK_ARMV3M, "3M", ARMBuildAttrs::CPUArch::v4 },
- { "armv4", ARM::AK_ARMV4, "4", ARMBuildAttrs::CPUArch::v4 },
- { "armv4t", ARM::AK_ARMV4T, "4T", ARMBuildAttrs::CPUArch::v4T },
- { "armv5", ARM::AK_ARMV5, "5", ARMBuildAttrs::CPUArch::v5T },
- { "armv5t", ARM::AK_ARMV5T, "5T", ARMBuildAttrs::CPUArch::v5T },
- { "armv5te", ARM::AK_ARMV5TE, "5TE", ARMBuildAttrs::CPUArch::v5TE },
- { "armv6", ARM::AK_ARMV6, "6", ARMBuildAttrs::CPUArch::v6 },
- { "armv6j", ARM::AK_ARMV6J, "6J", ARMBuildAttrs::CPUArch::v6 },
- { "armv6k", ARM::AK_ARMV6K, "6K", ARMBuildAttrs::CPUArch::v6K },
- { "armv6t2", ARM::AK_ARMV6T2, "6T2", ARMBuildAttrs::CPUArch::v6T2 },
- { "armv6z", ARM::AK_ARMV6Z, "6Z", ARMBuildAttrs::CPUArch::v6KZ },
- { "armv6zk", ARM::AK_ARMV6ZK, "6ZK", ARMBuildAttrs::CPUArch::v6KZ },
- { "armv6-m", ARM::AK_ARMV6M, "6-M", ARMBuildAttrs::CPUArch::v6_M },
- { "armv7", ARM::AK_ARMV7, "7", ARMBuildAttrs::CPUArch::v7 },
- { "armv7-a", ARM::AK_ARMV7A, "7-A", ARMBuildAttrs::CPUArch::v7 },
- { "armv7-r", ARM::AK_ARMV7R, "7-R", ARMBuildAttrs::CPUArch::v7 },
- { "armv7-m", ARM::AK_ARMV7M, "7-M", ARMBuildAttrs::CPUArch::v7 },
- { "armv8-a", ARM::AK_ARMV8A, "8-A", ARMBuildAttrs::CPUArch::v8 },
- { "armv8.1-a", ARM::AK_ARMV8_1A, "8.1-A", ARMBuildAttrs::CPUArch::v8 },
+ { "invalid", ARM::AK_INVALID, nullptr, nullptr, ARMBuildAttrs::CPUArch::Pre_v4 },
+ { "armv2", ARM::AK_ARMV2, "2", "v2", ARMBuildAttrs::CPUArch::Pre_v4 },
+ { "armv2a", ARM::AK_ARMV2A, "2A", "v2a", ARMBuildAttrs::CPUArch::Pre_v4 },
+ { "armv3", ARM::AK_ARMV3, "3", "v3", ARMBuildAttrs::CPUArch::Pre_v4 },
+ { "armv3m", ARM::AK_ARMV3M, "3M", "v3m", ARMBuildAttrs::CPUArch::Pre_v4 },
+ { "armv4", ARM::AK_ARMV4, "4", "v4", ARMBuildAttrs::CPUArch::v4 },
+ { "armv4t", ARM::AK_ARMV4T, "4T", "v4t", ARMBuildAttrs::CPUArch::v4T },
+ { "armv5t", ARM::AK_ARMV5T, "5T", "v5", ARMBuildAttrs::CPUArch::v5T },
+ { "armv5te", ARM::AK_ARMV5TE, "5TE", "v5e", ARMBuildAttrs::CPUArch::v5TE },
+ { "armv5tej", ARM::AK_ARMV5TEJ, "5TEJ", "v5e", ARMBuildAttrs::CPUArch::v5TEJ },
+ { "armv6", ARM::AK_ARMV6, "6", "v6", ARMBuildAttrs::CPUArch::v6 },
+ { "armv6k", ARM::AK_ARMV6K, "6K", "v6k", ARMBuildAttrs::CPUArch::v6K },
+ { "armv6t2", ARM::AK_ARMV6T2, "6T2", "v6t2", ARMBuildAttrs::CPUArch::v6T2 },
+ { "armv6z", ARM::AK_ARMV6Z, "6Z", "v6z", ARMBuildAttrs::CPUArch::v6KZ },
+ { "armv6zk", ARM::AK_ARMV6ZK, "6ZK", "v6zk", ARMBuildAttrs::CPUArch::v6KZ },
+ { "armv6-m", ARM::AK_ARMV6M, "6-M", "v6m", ARMBuildAttrs::CPUArch::v6_M },
+ { "armv6s-m", ARM::AK_ARMV6SM, "6S-M", "v6sm", ARMBuildAttrs::CPUArch::v6S_M },
+ { "armv7-a", ARM::AK_ARMV7A, "7-A", "v7", ARMBuildAttrs::CPUArch::v7 },
+ { "armv7-r", ARM::AK_ARMV7R, "7-R", "v7r", ARMBuildAttrs::CPUArch::v7 },
+ { "armv7-m", ARM::AK_ARMV7M, "7-M", "v7m", ARMBuildAttrs::CPUArch::v7 },
+ { "armv7e-m", ARM::AK_ARMV7EM, "7E-M", "v7em", ARMBuildAttrs::CPUArch::v7E_M },
+ { "armv8-a", ARM::AK_ARMV8A, "8-A", "v8", ARMBuildAttrs::CPUArch::v8 },
+ { "armv8.1-a", ARM::AK_ARMV8_1A, "8.1-A", "v8.1a", ARMBuildAttrs::CPUArch::v8 },
// Non-standard Arch names.
- { "iwmmxt", ARM::AK_IWMMXT, "iwmmxt", ARMBuildAttrs::CPUArch::v5TE },
- { "iwmmxt2", ARM::AK_IWMMXT2, "iwmmxt2", ARMBuildAttrs::CPUArch::v5TE },
- { "xscale", ARM::AK_XSCALE, "xscale", ARMBuildAttrs::CPUArch::v5TE },
- { "armv5e", ARM::AK_ARMV5E, "5E", ARMBuildAttrs::CPUArch::v5TE },
- { "armv5tej", ARM::AK_ARMV5TEJ, "5TE", ARMBuildAttrs::CPUArch::v5TE },
- { "armv6sm", ARM::AK_ARMV6SM, "6-M", ARMBuildAttrs::CPUArch::v6_M },
- { "armv6hl", ARM::AK_ARMV6HL, "6-M", ARMBuildAttrs::CPUArch::v6_M },
- { "armv7e-m", ARM::AK_ARMV7EM, "7E-M", ARMBuildAttrs::CPUArch::v7E_M },
- { "armv7l", ARM::AK_ARMV7L, "7-L", ARMBuildAttrs::CPUArch::v7 },
- { "armv7hl", ARM::AK_ARMV7HL, "7H-L", ARMBuildAttrs::CPUArch::v7 },
- { "armv7s", ARM::AK_ARMV7S, "7-S", ARMBuildAttrs::CPUArch::v7 }
+ { "iwmmxt", ARM::AK_IWMMXT, "iwmmxt", "", ARMBuildAttrs::CPUArch::v5TE },
+ { "iwmmxt2", ARM::AK_IWMMXT2, "iwmmxt2", "", ARMBuildAttrs::CPUArch::v5TE },
+ { "xscale", ARM::AK_XSCALE, "xscale", "", ARMBuildAttrs::CPUArch::v5TE },
+ { "armv5", ARM::AK_ARMV5, "5T", "v5", ARMBuildAttrs::CPUArch::v5T },
+ { "armv5e", ARM::AK_ARMV5E, "5TE", "v5e", ARMBuildAttrs::CPUArch::v5TE },
+ { "armv6j", ARM::AK_ARMV6J, "6J", "v6", ARMBuildAttrs::CPUArch::v6 },
+ { "armv6hl", ARM::AK_ARMV6HL, "6-M", "v6hl", ARMBuildAttrs::CPUArch::v6_M },
+ { "armv7", ARM::AK_ARMV7, "7", "v7", ARMBuildAttrs::CPUArch::v7 },
+ { "armv7l", ARM::AK_ARMV7L, "7-L", "v7l", ARMBuildAttrs::CPUArch::v7 },
+ { "armv7hl", ARM::AK_ARMV7HL, "7-L", "v7hl", ARMBuildAttrs::CPUArch::v7 },
+ { "armv7s", ARM::AK_ARMV7S, "7-S", "v7s", ARMBuildAttrs::CPUArch::v7 }
-// List of canonical ARCH names (use getARCHSynonym)
+// List of Arch Extension names.
// FIXME: TableGen this.
struct {
const char *Name;
@@ -99,12 +115,19 @@ struct {
{ "fp", ARM::AEK_FP },
{ "idiv", ARM::AEK_HWDIV },
{ "mp", ARM::AEK_MP },
+ { "simd", ARM::AEK_SIMD },
{ "sec", ARM::AEK_SEC },
- { "virt", ARM::AEK_VIRT }
+ { "virt", ARM::AEK_VIRT },
+ { "os", ARM::AEK_OS },
+ { "iwmmxt", ARM::AEK_IWMMXT },
+ { "iwmmxt2", ARM::AEK_IWMMXT2 },
+ { "maverick", ARM::AEK_MAVERICK },
+ { "xscale", ARM::AEK_XSCALE }
// List of CPU names and their arches.
// The same CPU can have multiple arches and can be default on multiple arches.
// When finding the Arch for a CPU, first-found prevails. Sort them accordingly.
+// When this becomes table-generated, we'd probably need two tables.
// FIXME: TableGen this.
struct {
const char *Name;
@@ -112,9 +135,15 @@ struct {
bool Default;
} CPUNames[] = {
{ "arm2", ARM::AK_ARMV2, true },
+ { "arm3", ARM::AK_ARMV2A, true },
{ "arm6", ARM::AK_ARMV3, true },
{ "arm7m", ARM::AK_ARMV3M, true },
+ { "arm8", ARM::AK_ARMV4, false },
+ { "arm810", ARM::AK_ARMV4, false },
{ "strongarm", ARM::AK_ARMV4, true },
+ { "strongarm110", ARM::AK_ARMV4, false },
+ { "strongarm1100", ARM::AK_ARMV4, false },
+ { "strongarm1110", ARM::AK_ARMV4, false },
{ "arm7tdmi", ARM::AK_ARMV4T, true },
{ "arm7tdmi-s", ARM::AK_ARMV4T, false },
{ "arm710t", ARM::AK_ARMV4T, false },
@@ -127,24 +156,21 @@ struct {
{ "arm9312", ARM::AK_ARMV4T, false },
{ "arm940t", ARM::AK_ARMV4T, false },
{ "ep9312", ARM::AK_ARMV4T, false },
- { "arm10tdmi", ARM::AK_ARMV5, true },
{ "arm10tdmi", ARM::AK_ARMV5T, true },
{ "arm1020t", ARM::AK_ARMV5T, false },
- { "xscale", ARM::AK_XSCALE, true },
- { "xscale", ARM::AK_ARMV5TE, false },
{ "arm9e", ARM::AK_ARMV5TE, false },
- { "arm926ej-s", ARM::AK_ARMV5TE, false },
- { "arm946ej-s", ARM::AK_ARMV5TE, false },
+ { "arm946e-s", ARM::AK_ARMV5TE, false },
{ "arm966e-s", ARM::AK_ARMV5TE, false },
{ "arm968e-s", ARM::AK_ARMV5TE, false },
+ { "arm10e", ARM::AK_ARMV5TE, false },
{ "arm1020e", ARM::AK_ARMV5TE, false },
{ "arm1022e", ARM::AK_ARMV5TE, true },
{ "iwmmxt", ARM::AK_ARMV5TE, false },
- { "iwmmxt", ARM::AK_IWMMXT, true },
+ { "xscale", ARM::AK_ARMV5TE, false },
+ { "arm926ej-s", ARM::AK_ARMV5TEJ, true },
{ "arm1136jf-s", ARM::AK_ARMV6, true },
- { "arm1136j-s", ARM::AK_ARMV6J, true },
- { "arm1136jz-s", ARM::AK_ARMV6J, false },
{ "arm1176j-s", ARM::AK_ARMV6K, false },
+ { "arm1176jz-s", ARM::AK_ARMV6K, false },
{ "mpcore", ARM::AK_ARMV6K, false },
{ "mpcorenovfp", ARM::AK_ARMV6K, false },
{ "arm1176jzf-s", ARM::AK_ARMV6K, true },
@@ -156,7 +182,6 @@ struct {
{ "cortex-m0plus", ARM::AK_ARMV6M, false },
{ "cortex-m1", ARM::AK_ARMV6M, false },
{ "sc000", ARM::AK_ARMV6M, false },
- { "cortex-a8", ARM::AK_ARMV7, true },
{ "cortex-a5", ARM::AK_ARMV7A, false },
{ "cortex-a7", ARM::AK_ARMV7A, false },
{ "cortex-a8", ARM::AK_ARMV7A, true },
@@ -171,18 +196,23 @@ struct {
{ "cortex-r7", ARM::AK_ARMV7R, false },
{ "sc300", ARM::AK_ARMV7M, false },
{ "cortex-m3", ARM::AK_ARMV7M, true },
- { "cortex-m4", ARM::AK_ARMV7M, false },
- { "cortex-m7", ARM::AK_ARMV7M, false },
+ { "cortex-m4", ARM::AK_ARMV7EM, true },
+ { "cortex-m7", ARM::AK_ARMV7EM, false },
{ "cortex-a53", ARM::AK_ARMV8A, true },
{ "cortex-a57", ARM::AK_ARMV8A, false },
{ "cortex-a72", ARM::AK_ARMV8A, false },
{ "cyclone", ARM::AK_ARMV8A, false },
{ "generic", ARM::AK_ARMV8_1A, true },
// Non-standard Arch names.
+ { "iwmmxt", ARM::AK_IWMMXT, true },
+ { "xscale", ARM::AK_XSCALE, true },
+ { "arm10tdmi", ARM::AK_ARMV5, true },
{ "arm1022e", ARM::AK_ARMV5E, true },
- { "arm926ej-s", ARM::AK_ARMV5TEJ, true },
+ { "arm1136j-s", ARM::AK_ARMV6J, true },
+ { "arm1136jz-s", ARM::AK_ARMV6J, false },
{ "cortex-m0", ARM::AK_ARMV6SM, true },
{ "arm1176jzf-s", ARM::AK_ARMV6HL, true },
+ { "cortex-a8", ARM::AK_ARMV7, true },
{ "cortex-a8", ARM::AK_ARMV7L, true },
{ "cortex-a8", ARM::AK_ARMV7HL, true },
{ "cortex-m4", ARM::AK_ARMV7EM, true },
@@ -193,8 +223,6 @@ struct {
} // namespace
-namespace llvm {
// ======================================================= //
// Information by ID
// ======================================================= //
@@ -205,22 +233,117 @@ const char *ARMTargetParser::getFPUName(unsigned FPUKind) {
return FPUNames[FPUKind].Name;
+unsigned ARMTargetParser::getFPUVersion(unsigned FPUKind) {
+ if (FPUKind >= ARM::FK_LAST)
+ return 0;
+ return FPUNames[FPUKind].FPUVersion;
+unsigned ARMTargetParser::getFPUNeonSupportLevel(unsigned FPUKind) {
+ if (FPUKind >= ARM::FK_LAST)
+ return 0;
+ return FPUNames[FPUKind].NeonSupport;
+unsigned ARMTargetParser::getFPURestriction(unsigned FPUKind) {
+ if (FPUKind >= ARM::FK_LAST)
+ return 0;
+ return FPUNames[FPUKind].Restriction;
+bool ARMTargetParser::getFPUFeatures(unsigned FPUKind,
+ std::vector<const char *> &Features) {
+ if (FPUKind >= ARM::FK_LAST || FPUKind == ARM::FK_INVALID)
+ return false;
+ // fp-only-sp and d16 subtarget features are independent of each other, so we
+ // must enable/disable both.
+ switch (FPUNames[FPUKind].Restriction) {
+ case ARM::FR_SP_D16:
+ Features.push_back("+fp-only-sp");
+ Features.push_back("+d16");
+ break;
+ case ARM::FR_D16:
+ Features.push_back("-fp-only-sp");
+ Features.push_back("+d16");
+ break;
+ case ARM::FR_None:
+ Features.push_back("-fp-only-sp");
+ Features.push_back("-d16");
+ break;
+ }
+ // FPU version subtarget features are inclusive of lower-numbered ones, so
+ // enable the one corresponding to this version and disable all that are
+ // higher.
+ switch (FPUNames[FPUKind].FPUVersion) {
+ case 5:
+ Features.push_back("+fp-armv8");
+ break;
+ case 4:
+ Features.push_back("+vfp4");
+ Features.push_back("-fp-armv8");
+ break;
+ case 3:
+ Features.push_back("+vfp3");
+ Features.push_back("-vfp4");
+ Features.push_back("-fp-armv8");
+ break;
+ case 2:
+ Features.push_back("+vfp2");
+ Features.push_back("-vfp3");
+ Features.push_back("-vfp4");
+ Features.push_back("-fp-armv8");
+ break;
+ case 0:
+ Features.push_back("-vfp2");
+ Features.push_back("-vfp3");
+ Features.push_back("-vfp4");
+ Features.push_back("-fp-armv8");
+ break;
+ }
+ // crypto includes neon, so we handle this similarly to FPU version.
+ switch (FPUNames[FPUKind].NeonSupport) {
+ case ARM::NS_Crypto:
+ Features.push_back("+crypto");
+ break;
+ case ARM::NS_Neon:
+ Features.push_back("+neon");
+ Features.push_back("-crypto");
+ break;
+ case ARM::NS_None:
+ Features.push_back("-neon");
+ Features.push_back("-crypto");
+ break;
+ }
+ return true;
const char *ARMTargetParser::getArchName(unsigned ArchKind) {
if (ArchKind >= ARM::AK_LAST)
return nullptr;
return ARCHNames[ArchKind].Name;
-const char *ARMTargetParser::getArchDefaultCPUName(unsigned ArchKind) {
+const char *ARMTargetParser::getCPUAttr(unsigned ArchKind) {
+ if (ArchKind >= ARM::AK_LAST)
+ return nullptr;
+ return ARCHNames[ArchKind].CPUAttr;
+const char *ARMTargetParser::getSubArch(unsigned ArchKind) {
if (ArchKind >= ARM::AK_LAST)
return nullptr;
- return ARCHNames[ArchKind].DefaultCPU;
+ return ARCHNames[ArchKind].SubArch;
-unsigned ARMTargetParser::getArchDefaultCPUArch(unsigned ArchKind) {
+unsigned ARMTargetParser::getArchAttr(unsigned ArchKind) {
if (ArchKind >= ARM::AK_LAST)
return ARMBuildAttrs::CPUArch::Pre_v4;
- return ARCHNames[ArchKind].DefaultArch;
+ return ARCHNames[ArchKind].ArchAttr;
const char *ARMTargetParser::getArchExtName(unsigned ArchExtKind) {
@@ -254,10 +377,9 @@ StringRef ARMTargetParser::getFPUSynonym(StringRef FPU) {
.Case("vfp4", "vfpv4")
.Case("vfp3-d16", "vfpv3-d16")
.Case("vfp4-d16", "vfpv4-d16")
- // FIXME: sp-16 is NOT the same as d16
- .Cases("fp4-sp-d16", "fpv4-sp-d16", "vfpv4-d16")
+ .Cases("fp4-sp-d16", "vfpv4-sp-d16", "fpv4-sp-d16")
.Cases("fp4-dp-d16", "fpv4-dp-d16", "vfpv4-d16")
- .Cases("fp5-sp-d16", "fpv5-sp-d16", "fpv5-d16")
+ .Case("fp5-sp-d16", "fpv5-sp-d16")
.Cases("fp5-dp-d16", "fpv5-dp-d16", "fpv5-d16")
// FIXME: Clang uses it, but it's bogus, since neon defaults to vfpv3.
.Case("neon-vfpv3", "neon")
@@ -266,15 +388,14 @@ StringRef ARMTargetParser::getFPUSynonym(StringRef FPU) {
StringRef ARMTargetParser::getArchSynonym(StringRef Arch) {
return StringSwitch<StringRef>(Arch)
- .Cases("armv6m", "v6m", "armv6-m")
- .Cases("armv7a", "v7a", "armv7-a")
- .Cases("armv7r", "v7r", "armv7-r")
- .Cases("armv7m", "v7m", "armv7-m")
- .Cases("armv7em", "v7em", "armv7e-m")
- .Cases("armv8", "v8", "armv8-a")
- .Cases("armv8a", "v8a", "armv8-a")
- .Cases("armv8.1a", "v8.1a", "armv8.1-a")
- .Cases("aarch64", "arm64", "armv8-a")
+ .Case("v6sm", "v6s-m")
+ .Case("v6m", "v6-m")
+ .Case("v7a", "v7-a")
+ .Case("v7r", "v7-r")
+ .Case("v7m", "v7-m")
+ .Case("v7em", "v7e-m")
+ .Cases("v8", "v8a", "aarch64", "arm64", "v8-a")
+ .Case("v8.1a", "v8.1-a")
@@ -342,6 +463,7 @@ unsigned ARMTargetParser::parseFPU(StringRef FPU) {
// Allows partial match, ex. "v7a" matches "armv7a".
unsigned ARMTargetParser::parseArch(StringRef Arch) {
+ Arch = getCanonicalArchName(Arch);
StringRef Syn = getArchSynonym(Arch);
for (const auto A : ARCHNames) {
if (StringRef(A.Name).endswith(Syn))
@@ -463,5 +585,3 @@ unsigned ARMTargetParser::parseArchVersion(StringRef Arch) {
return 0;
-} // namespace llvm
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index a63426f..ad99386 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -13,6 +13,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetParser.h"
+#include "llvm/Support/Host.h"
#include <cstring>
using namespace llvm;
@@ -24,7 +25,8 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case aarch64_be: return "aarch64_be";
case arm: return "arm";
case armeb: return "armeb";
- case bpf: return "bpf";
+ case bpfel: return "bpfel";
+ case bpfeb: return "bpfeb";
case hexagon: return "hexagon";
case mips: return "mips";
case mipsel: return "mipsel";
@@ -89,7 +91,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case amdgcn:
case r600: return "amdgpu";
- case bpf: return "bpf";
+ case bpfel:
+ case bpfeb: return "bpf";
case sparcv9:
case sparcel:
@@ -192,14 +195,30 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
llvm_unreachable("Invalid EnvironmentType!");
+static Triple::ArchType parseBPFArch(StringRef ArchName) {
+ if (ArchName.equals("bpf")) {
+ if (sys::IsLittleEndianHost)
+ return Triple::bpfel;
+ else
+ return Triple::bpfeb;
+ } else if (ArchName.equals("bpf_be") || ArchName.equals("bpfeb")) {
+ return Triple::bpfeb;
+ } else if (ArchName.equals("bpf_le") || ArchName.equals("bpfel")) {
+ return Triple::bpfel;
+ } else {
+ return Triple::UnknownArch;
+ }
Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
+ Triple::ArchType BPFArch(parseBPFArch(Name));
return StringSwitch<Triple::ArchType>(Name)
.Case("aarch64", aarch64)
.Case("aarch64_be", aarch64_be)
.Case("arm64", aarch64) // "arm64" is an alias for "aarch64"
.Case("arm", arm)
.Case("armeb", armeb)
- .Case("bpf", bpf)
+ .StartsWith("bpf", BPFArch)
.Case("mips", mips)
.Case("mipsel", mipsel)
.Case("mips64", mips64)
@@ -296,6 +315,7 @@ static Triple::ArchType parseARMArch(StringRef ArchName) {
static Triple::ArchType parseArch(StringRef ArchName) {
Triple::ArchType ARMArch(parseARMArch(ArchName));
+ Triple::ArchType BPFArch(parseBPFArch(ArchName));
return StringSwitch<Triple::ArchType>(ArchName)
.Cases("i386", "i486", "i586", "i686", Triple::x86)
@@ -317,7 +337,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("mips64el", Triple::mips64el)
.Case("r600", Triple::r600)
.Case("amdgcn", Triple::amdgcn)
- .Case("bpf", Triple::bpf)
+ .StartsWith("bpf", BPFArch)
.Case("hexagon", Triple::hexagon)
.Case("s390x", Triple::systemz)
.Case("sparc", Triple::sparc)
@@ -702,6 +722,16 @@ std::string Triple::normalize(StringRef Str) {
// Special case logic goes here. At this point Arch, Vendor and OS have the
// correct values for the computed components.
+ std::string NormalizedEnvironment;
+ if (Environment == Triple::Android && Components[3].startswith("androideabi")) {
+ StringRef AndroidVersion = Components[3].drop_front(strlen("androideabi"));
+ if (AndroidVersion.empty()) {
+ Components[3] = "android";
+ } else {
+ NormalizedEnvironment = Twine("android", AndroidVersion).str();
+ Components[3] = NormalizedEnvironment;
+ }
+ }
if (OS == Triple::Win32) {
@@ -779,41 +809,47 @@ static unsigned EatNumber(StringRef &Str) {
return Result;
-void Triple::getOSVersion(unsigned &Major, unsigned &Minor,
- unsigned &Micro) const {
- StringRef OSName = getOSName();
- // For Android, we care about the Android version rather than the Linux
- // version.
- if (getEnvironment() == Android) {
- OSName = getEnvironmentName().substr(strlen("android"));
- if (OSName.startswith("eabi"))
- OSName = OSName.substr(strlen("eabi"));
- }
- // Assume that the OS portion of the triple starts with the canonical name.
- StringRef OSTypeName = getOSTypeName(getOS());
- if (OSName.startswith(OSTypeName))
- OSName = OSName.substr(OSTypeName.size());
+static void parseVersionFromName(StringRef Name, unsigned &Major,
+ unsigned &Minor, unsigned &Micro) {
// Any unset version defaults to 0.
Major = Minor = Micro = 0;
// Parse up to three components.
- unsigned *Components[3] = { &Major, &Minor, &Micro };
+ unsigned *Components[3] = {&Major, &Minor, &Micro};
for (unsigned i = 0; i != 3; ++i) {
- if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
+ if (Name.empty() || Name[0] < '0' || Name[0] > '9')
// Consume the leading number.
- *Components[i] = EatNumber(OSName);
+ *Components[i] = EatNumber(Name);
// Consume the separator, if present.
- if (OSName.startswith("."))
- OSName = OSName.substr(1);
+ if (Name.startswith("."))
+ Name = Name.substr(1);
+void Triple::getEnvironmentVersion(unsigned &Major, unsigned &Minor,
+ unsigned &Micro) const {
+ StringRef EnvironmentName = getEnvironmentName();
+ StringRef EnvironmentTypeName = getEnvironmentTypeName(getEnvironment());
+ if (EnvironmentName.startswith(EnvironmentTypeName))
+ EnvironmentName = EnvironmentName.substr(EnvironmentTypeName.size());
+ parseVersionFromName(EnvironmentName, Major, Minor, Micro);
+void Triple::getOSVersion(unsigned &Major, unsigned &Minor,
+ unsigned &Micro) const {
+ StringRef OSName = getOSName();
+ // Assume that the OS portion of the triple starts with the canonical name.
+ StringRef OSTypeName = getOSTypeName(getOS());
+ if (OSName.startswith(OSTypeName))
+ OSName = OSName.substr(OSTypeName.size());
+ parseVersionFromName(OSName, Major, Minor, Micro);
bool Triple::getMacOSXVersion(unsigned &Major, unsigned &Minor,
unsigned &Micro) const {
getOSVersion(Major, Minor, Micro);
@@ -973,7 +1009,8 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_be:
case llvm::Triple::amdgcn:
- case llvm::Triple::bpf:
+ case llvm::Triple::bpfel:
+ case llvm::Triple::bpfeb:
case llvm::Triple::le64:
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
@@ -1010,7 +1047,8 @@ Triple Triple::get32BitArchVariant() const {
case Triple::aarch64:
case Triple::aarch64_be:
case Triple::amdgcn:
- case Triple::bpf:
+ case Triple::bpfel:
+ case Triple::bpfeb:
case Triple::msp430:
case Triple::systemz:
case Triple::ppc64le:
@@ -1074,7 +1112,8 @@ Triple Triple::get64BitArchVariant() const {
case Triple::aarch64:
case Triple::aarch64_be:
- case Triple::bpf:
+ case Triple::bpfel:
+ case Triple::bpfeb:
case Triple::le64:
case Triple::amdil64:
case Triple::amdgcn:
@@ -1108,13 +1147,13 @@ Triple Triple::get64BitArchVariant() const {
const char *Triple::getARMCPUForArch(StringRef MArch) const {
if (MArch.empty())
MArch = getArchName();
+ MArch = ARMTargetParser::getCanonicalArchName(MArch);
// Some defaults are forced.
switch (getOS()) {
case llvm::Triple::FreeBSD:
case llvm::Triple::NetBSD:
- // FIXME: This doesn't work on BE/thumb variants.
- if (MArch == "armv6")
+ if (!MArch.empty() && MArch == "v6")
return "arm1176jzf-s";
case llvm::Triple::Win32:
@@ -1124,7 +1163,6 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const {
- MArch = ARMTargetParser::getCanonicalArchName(MArch);
if (MArch.empty())
return nullptr;
diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp
index 90f34f6..6b59a16 100644
--- a/lib/Support/YAMLTraits.cpp
+++ b/lib/Support/YAMLTraits.cpp
@@ -97,6 +97,10 @@ bool Input::nextDocument() {
return ++DocIterator != Strm->end();
+const Node *Input::getCurrentNode() const {
+ return CurrentNode ? CurrentNode->_node : nullptr;
bool Input::mapTag(StringRef Tag, bool Default) {
std::string foundTag = CurrentNode->_node->getVerbatimTag();
if (foundTag.empty()) {
@@ -400,9 +404,10 @@ bool Input::canElideEmptySequence() {
// Output
-Output::Output(raw_ostream &yout, void *context)
+Output::Output(raw_ostream &yout, void *context, int WrapColumn)
: IO(context),
+ WrapColumn(WrapColumn),
@@ -525,7 +530,7 @@ void Output::endFlowSequence() {
bool Output::preflightFlowElement(unsigned, void *&) {
if (NeedFlowSequenceComma)
output(", ");
- if (Column > 70) {
+ if (WrapColumn && Column > WrapColumn) {
for (int i = 0; i < ColumnAtFlowStart; ++i)
output(" ");
@@ -716,7 +721,7 @@ void Output::paddedKey(StringRef key) {
void Output::flowKey(StringRef Key) {
if (StateStack.back() == inFlowMapOtherKey)
output(", ");
- if (Column > 70) {
+ if (WrapColumn && Column > WrapColumn) {
for (int I = 0; I < ColumnAtMapFlowStart; ++I)
output(" ");
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 4c0b6c7..42f830b 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -815,7 +815,7 @@ void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) {
uint64_t raw_svector_ostream::current_pos() const {
- return OS.size();
+ return OS.size();
StringRef raw_svector_ostream::str() {
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index 9783922..97e796c 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -86,7 +86,6 @@ IntRecTy IntRecTy::Shared;
StringRecTy StringRecTy::Shared;
DagRecTy DagRecTy::Shared;
-void RecTy::anchor() { }
void RecTy::dump() const { print(errs()); }
ListRecTy *RecTy::getListTy() {
@@ -95,47 +94,15 @@ ListRecTy *RecTy::getListTy() {
return ListTy.get();
-bool RecTy::baseClassOf(const RecTy *RHS) const {
- assert (RHS && "NULL pointer");
+bool RecTy::typeIsConvertibleTo(const RecTy *RHS) const {
+ assert(RHS && "NULL pointer");
return Kind == RHS->getRecTyKind();
-Init *BitRecTy::convertValue(BitsInit *BI) {
- if (BI->getNumBits() != 1) return nullptr; // Only accept if just one bit!
- return BI->getBit(0);
-Init *BitRecTy::convertValue(IntInit *II) {
- int64_t Val = II->getValue();
- if (Val != 0 && Val != 1) return nullptr; // Only accept 0 or 1 for a bit!
- return BitInit::get(Val != 0);
-Init *BitRecTy::convertValue(TypedInit *TI) {
- RecTy *Ty = TI->getType();
- if (isa<BitRecTy>(Ty))
- return TI; // Accept variable if it is already of bit type!
- if (auto *BitsTy = dyn_cast<BitsRecTy>(Ty))
- // Accept only bits<1> expression.
- return BitsTy->getNumBits() == 1 ? TI : nullptr;
- // Ternary !if can be converted to bit, but only if both sides are
- // convertible to a bit.
- if (TernOpInit *TOI = dyn_cast<TernOpInit>(TI)) {
- if (TOI->getOpcode() != TernOpInit::TernaryOp::IF)
- return nullptr;
- if (!TOI->getMHS()->convertInitializerTo(BitRecTy::get()) ||
- !TOI->getRHS()->convertInitializerTo(BitRecTy::get()))
- return nullptr;
- return TOI;
- }
- return nullptr;
-bool BitRecTy::baseClassOf(const RecTy *RHS) const{
- if(RecTy::baseClassOf(RHS) || RHS->getRecTyKind() == IntRecTyKind)
+bool BitRecTy::typeIsConvertibleTo(const RecTy *RHS) const{
+ if (RecTy::typeIsConvertibleTo(RHS) || RHS->getRecTyKind() == IntRecTyKind)
return true;
- if(const BitsRecTy *BitsTy = dyn_cast<BitsRecTy>(RHS))
+ if (const BitsRecTy *BitsTy = dyn_cast<BitsRecTy>(RHS))
return BitsTy->getNumBits() == 1;
return false;
@@ -154,193 +121,34 @@ std::string BitsRecTy::getAsString() const {
return "bits<" + utostr(Size) + ">";
-Init *BitsRecTy::convertValue(UnsetInit *UI) {
- SmallVector<Init *, 16> NewBits(Size);
- for (unsigned i = 0; i != Size; ++i)
- NewBits[i] = UnsetInit::get();
- return BitsInit::get(NewBits);
-Init *BitsRecTy::convertValue(BitInit *BI) {
- if (Size != 1) return nullptr; // Can only convert single bit.
- return BitsInit::get(BI);
-/// canFitInBitfield - Return true if the number of bits is large enough to hold
-/// the integer value.
-static bool canFitInBitfield(int64_t Value, unsigned NumBits) {
- // For example, with NumBits == 4, we permit Values from [-7 .. 15].
- return (NumBits >= sizeof(Value) * 8) ||
- (Value >> NumBits == 0) || (Value >> (NumBits-1) == -1);
-/// convertValue from Int initializer to bits type: Split the integer up into the
-/// appropriate bits.
-Init *BitsRecTy::convertValue(IntInit *II) {
- int64_t Value = II->getValue();
- // Make sure this bitfield is large enough to hold the integer value.
- if (!canFitInBitfield(Value, Size))
- return nullptr;
- SmallVector<Init *, 16> NewBits(Size);
- for (unsigned i = 0; i != Size; ++i)
- NewBits[i] = BitInit::get(Value & (1LL << i));
- return BitsInit::get(NewBits);
-Init *BitsRecTy::convertValue(BitsInit *BI) {
- // If the number of bits is right, return it. Otherwise we need to expand or
- // truncate.
- if (BI->getNumBits() == Size) return BI;
- return nullptr;
-Init *BitsRecTy::convertValue(TypedInit *TI) {
- if (Size == 1 && isa<BitRecTy>(TI->getType()))
- return BitsInit::get(TI);
- if (TI->getType()->typeIsConvertibleTo(this)) {
- SmallVector<Init *, 16> NewBits(Size);
- for (unsigned i = 0; i != Size; ++i)
- NewBits[i] = VarBitInit::get(TI, i);
- return BitsInit::get(NewBits);
- }
- return nullptr;
-bool BitsRecTy::baseClassOf(const RecTy *RHS) const{
- if (RecTy::baseClassOf(RHS)) //argument and the receiver are the same type
+bool BitsRecTy::typeIsConvertibleTo(const RecTy *RHS) const {
+ if (RecTy::typeIsConvertibleTo(RHS)) //argument and the sender are same type
return cast<BitsRecTy>(RHS)->Size == Size;
RecTyKind kind = RHS->getRecTyKind();
return (kind == BitRecTyKind && Size == 1) || (kind == IntRecTyKind);
-Init *IntRecTy::convertValue(BitInit *BI) {
- return IntInit::get(BI->getValue());
-Init *IntRecTy::convertValue(BitsInit *BI) {
- int64_t Result = 0;
- for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i)
- if (BitInit *Bit = dyn_cast<BitInit>(BI->getBit(i)))
- Result |= static_cast<int64_t>(Bit->getValue()) << i;
- else
- return nullptr;
- return IntInit::get(Result);
-Init *IntRecTy::convertValue(TypedInit *TI) {
- if (TI->getType()->typeIsConvertibleTo(this))
- return TI; // Accept variable if already of the right type!
- return nullptr;
-bool IntRecTy::baseClassOf(const RecTy *RHS) const{
+bool IntRecTy::typeIsConvertibleTo(const RecTy *RHS) const {
RecTyKind kind = RHS->getRecTyKind();
return kind==BitRecTyKind || kind==BitsRecTyKind || kind==IntRecTyKind;
-Init *StringRecTy::convertValue(UnOpInit *UO) {
- if (UO->getOpcode() == UnOpInit::CAST) {
- Init *L = UO->getOperand()->convertInitializerTo(this);
- if (!L) return nullptr;
- if (L != UO->getOperand())
- return UnOpInit::get(UnOpInit::CAST, L, StringRecTy::get());
- return UO;
- }
- return convertValue((TypedInit*)UO);
-Init *StringRecTy::convertValue(BinOpInit *BO) {
- if (BO->getOpcode() == BinOpInit::STRCONCAT) {
- Init *L = BO->getLHS()->convertInitializerTo(this);
- Init *R = BO->getRHS()->convertInitializerTo(this);
- if (!L || !R) return nullptr;
- if (L != BO->getLHS() || R != BO->getRHS())
- return BinOpInit::get(BinOpInit::STRCONCAT, L, R, StringRecTy::get());
- return BO;
- }
- return convertValue((TypedInit*)BO);
-Init *StringRecTy::convertValue(TypedInit *TI) {
- if (isa<StringRecTy>(TI->getType()))
- return TI; // Accept variable if already of the right type!
- return nullptr;
+std::string StringRecTy::getAsString() const {
+ return "string";
std::string ListRecTy::getAsString() const {
return "list<" + Ty->getAsString() + ">";
-Init *ListRecTy::convertValue(ListInit *LI) {
- std::vector<Init*> Elements;
- // Verify that all of the elements of the list are subclasses of the
- // appropriate class!
- for (unsigned i = 0, e = LI->getSize(); i != e; ++i)
- if (Init *CI = LI->getElement(i)->convertInitializerTo(Ty))
- Elements.push_back(CI);
- else
- return nullptr;
- if (!isa<ListRecTy>(LI->getType()))
- return nullptr;
- return ListInit::get(Elements, this);
-Init *ListRecTy::convertValue(TypedInit *TI) {
- // Ensure that TI is compatible with our class.
- if (ListRecTy *LRT = dyn_cast<ListRecTy>(TI->getType()))
- if (LRT->getElementType()->typeIsConvertibleTo(getElementType()))
- return TI;
- return nullptr;
-bool ListRecTy::baseClassOf(const RecTy *RHS) const{
- if(const ListRecTy* ListTy = dyn_cast<ListRecTy>(RHS))
- return ListTy->getElementType()->typeIsConvertibleTo(Ty);
+bool ListRecTy::typeIsConvertibleTo(const RecTy *RHS) const {
+ if (const auto *ListTy = dyn_cast<ListRecTy>(RHS))
+ return Ty->typeIsConvertibleTo(ListTy->getElementType());
return false;
-Init *DagRecTy::convertValue(TypedInit *TI) {
- if (TI->getType()->typeIsConvertibleTo(this))
- return TI;
- return nullptr;
-Init *DagRecTy::convertValue(UnOpInit *UO) {
- if (UO->getOpcode() == UnOpInit::CAST) {
- Init *L = UO->getOperand()->convertInitializerTo(this);
- if (!L) return nullptr;
- if (L != UO->getOperand())
- return UnOpInit::get(UnOpInit::CAST, L, DagRecTy::get());
- return UO;
- }
- return nullptr;
-Init *DagRecTy::convertValue(BinOpInit *BO) {
- if (BO->getOpcode() == BinOpInit::CONCAT) {
- Init *L = BO->getLHS()->convertInitializerTo(this);
- Init *R = BO->getRHS()->convertInitializerTo(this);
- if (!L || !R) return nullptr;
- if (L != BO->getLHS() || R != BO->getRHS())
- return BinOpInit::get(BinOpInit::CONCAT, L, R, DagRecTy::get());
- return BO;
- }
- return nullptr;
+std::string DagRecTy::getAsString() const {
+ return "dag";
RecordRecTy *RecordRecTy::get(Record *R) {
@@ -351,33 +159,16 @@ std::string RecordRecTy::getAsString() const {
return Rec->getName();
-Init *RecordRecTy::convertValue(DefInit *DI) {
- // Ensure that DI is a subclass of Rec.
- if (!DI->getDef()->isSubClassOf(Rec))
- return nullptr;
- return DI;
-Init *RecordRecTy::convertValue(TypedInit *TI) {
- // Ensure that TI is compatible with Rec.
- if (RecordRecTy *RRT = dyn_cast<RecordRecTy>(TI->getType()))
- if (RRT->getRecord()->isSubClassOf(getRecord()) ||
- RRT->getRecord() == getRecord())
- return TI;
- return nullptr;
-bool RecordRecTy::baseClassOf(const RecTy *RHS) const{
+bool RecordRecTy::typeIsConvertibleTo(const RecTy *RHS) const {
const RecordRecTy *RTy = dyn_cast<RecordRecTy>(RHS);
if (!RTy)
return false;
- if (Rec == RTy->getRecord() || RTy->getRecord()->isSubClassOf(Rec))
+ if (RTy->getRecord() == Rec || Rec->isSubClassOf(RTy->getRecord()))
return true;
- const std::vector<Record*> &SC = Rec->getSuperClasses();
- for (unsigned i = 0, e = SC.size(); i != e; ++i)
- if (RTy->getRecord()->isSubClassOf(SC[i]))
+ for (Record *SC : RTy->getRecord()->getSuperClasses())
+ if (Rec->isSubClassOf(SC))
return true;
return false;
@@ -422,14 +213,24 @@ RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) {
void Init::anchor() { }
void Init::dump() const { return print(errs()); }
-void UnsetInit::anchor() { }
UnsetInit *UnsetInit::get() {
static UnsetInit TheInit;
return &TheInit;
-void BitInit::anchor() { }
+Init *UnsetInit::convertInitializerTo(RecTy *Ty) const {
+ if (auto *BRT = dyn_cast<BitsRecTy>(Ty)) {
+ SmallVector<Init *, 16> NewBits(BRT->getNumBits());
+ for (unsigned i = 0; i != BRT->getNumBits(); ++i)
+ NewBits[i] = UnsetInit::get();
+ return BitsInit::get(NewBits);
+ }
+ // All other types can just be returned.
+ return const_cast<UnsetInit *>(this);
BitInit *BitInit::get(bool V) {
static BitInit True(true);
@@ -438,6 +239,22 @@ BitInit *BitInit::get(bool V) {
return V ? &True : &False;
+Init *BitInit::convertInitializerTo(RecTy *Ty) const {
+ if (isa<BitRecTy>(Ty))
+ return const_cast<BitInit *>(this);
+ if (isa<IntRecTy>(Ty))
+ return IntInit::get(getValue());
+ if (auto *BRT = dyn_cast<BitsRecTy>(Ty)) {
+ // Can only convert single bit.
+ if (BRT->getNumBits() == 1)
+ return BitsInit::get(const_cast<BitInit *>(this));
+ }
+ return nullptr;
static void
ProfileBitsInit(FoldingSetNodeID &ID, ArrayRef<Init *> Range) {
@@ -467,6 +284,32 @@ void BitsInit::Profile(FoldingSetNodeID &ID) const {
ProfileBitsInit(ID, Bits);
+Init *BitsInit::convertInitializerTo(RecTy *Ty) const {
+ if (isa<BitRecTy>(Ty)) {
+ if (getNumBits() != 1) return nullptr; // Only accept if just one bit!
+ return getBit(0);
+ }
+ if (auto *BRT = dyn_cast<BitsRecTy>(Ty)) {
+ // If the number of bits is right, return it. Otherwise we need to expand
+ // or truncate.
+ if (getNumBits() != BRT->getNumBits()) return nullptr;
+ return const_cast<BitsInit *>(this);
+ }
+ if (isa<IntRecTy>(Ty)) {
+ int64_t Result = 0;
+ for (unsigned i = 0, e = getNumBits(); i != e; ++i)
+ if (auto *Bit = dyn_cast<BitInit>(getBit(i)))
+ Result |= static_cast<int64_t>(Bit->getValue()) << i;
+ else
+ return nullptr;
+ return IntInit::get(Result);
+ }
+ return nullptr;
Init *
BitsInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
SmallVector<Init *, 16> NewBits(Bits.size());
@@ -560,6 +403,40 @@ std::string IntInit::getAsString() const {
return itostr(Value);
+/// canFitInBitfield - Return true if the number of bits is large enough to hold
+/// the integer value.
+static bool canFitInBitfield(int64_t Value, unsigned NumBits) {
+ // For example, with NumBits == 4, we permit Values from [-7 .. 15].
+ return (NumBits >= sizeof(Value) * 8) ||
+ (Value >> NumBits == 0) || (Value >> (NumBits-1) == -1);
+Init *IntInit::convertInitializerTo(RecTy *Ty) const {
+ if (isa<IntRecTy>(Ty))
+ return const_cast<IntInit *>(this);
+ if (isa<BitRecTy>(Ty)) {
+ int64_t Val = getValue();
+ if (Val != 0 && Val != 1) return nullptr; // Only accept 0 or 1 for a bit!
+ return BitInit::get(Val != 0);
+ }
+ if (auto *BRT = dyn_cast<BitsRecTy>(Ty)) {
+ int64_t Value = getValue();
+ // Make sure this bitfield is large enough to hold the integer value.
+ if (!canFitInBitfield(Value, BRT->getNumBits()))
+ return nullptr;
+ SmallVector<Init *, 16> NewBits(BRT->getNumBits());
+ for (unsigned i = 0; i != BRT->getNumBits(); ++i)
+ NewBits[i] = BitInit::get(Value & (1LL << i));
+ return BitsInit::get(NewBits);
+ }
+ return nullptr;
Init *
IntInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
SmallVector<Init *, 16> NewBits(Bits.size());
@@ -573,8 +450,6 @@ IntInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
return BitsInit::get(NewBits);
-void StringInit::anchor() { }
StringInit *StringInit::get(StringRef V) {
static StringMap<std::unique_ptr<StringInit>> ThePool;
@@ -583,6 +458,13 @@ StringInit *StringInit::get(StringRef V) {
return I.get();
+Init *StringInit::convertInitializerTo(RecTy *Ty) const {
+ if (isa<StringRecTy>(Ty))
+ return const_cast<StringInit *>(this);
+ return nullptr;
static void ProfileListInit(FoldingSetNodeID &ID,
ArrayRef<Init *> Range,
RecTy *EltTy) {
@@ -616,11 +498,30 @@ void ListInit::Profile(FoldingSetNodeID &ID) const {
ProfileListInit(ID, Values, EltTy);
+Init *ListInit::convertInitializerTo(RecTy *Ty) const {
+ if (auto *LRT = dyn_cast<ListRecTy>(Ty)) {
+ std::vector<Init*> Elements;
+ // Verify that all of the elements of the list are subclasses of the
+ // appropriate class!
+ for (Init *I : getValues())
+ if (Init *CI = I->convertInitializerTo(LRT->getElementType()))
+ Elements.push_back(CI);
+ else
+ return nullptr;
+ if (isa<ListRecTy>(getType()))
+ return ListInit::get(Elements, Ty);
+ }
+ return nullptr;
Init *
ListInit::convertInitListSlice(const std::vector<unsigned> &Elements) const {
std::vector<Init*> Vals;
for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
- if (Elements[i] >= getSize())
+ if (Elements[i] >= size())
return nullptr;
@@ -637,12 +538,11 @@ Record *ListInit::getElementAsRecord(unsigned i) const {
Init *ListInit::resolveReferences(Record &R, const RecordVal *RV) const {
std::vector<Init*> Resolved;
- Resolved.reserve(getSize());
+ Resolved.reserve(size());
bool Changed = false;
- for (unsigned i = 0, e = getSize(); i != e; ++i) {
+ for (Init *CurElt : getValues()) {
Init *E;
- Init *CurElt = getElement(i);
do {
E = CurElt;
@@ -659,7 +559,7 @@ Init *ListInit::resolveReferences(Record &R, const RecordVal *RV) const {
Init *ListInit::resolveListElementReference(Record &R, const RecordVal *IRV,
unsigned Elt) const {
- if (Elt >= getSize())
+ if (Elt >= size())
return nullptr; // Out of range reference.
Init *E = getElement(Elt);
// If the element is set to some value, or if we are resolving a reference
@@ -717,12 +617,12 @@ UnOpInit *UnOpInit::get(UnaryOp opc, Init *lhs, RecTy *Type) {
Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
switch (getOpcode()) {
case CAST: {
- if (getType()->getAsString() == "string") {
+ if (isa<StringRecTy>(getType())) {
if (StringInit *LHSs = dyn_cast<StringInit>(LHS))
return LHSs;
if (DefInit *LHSd = dyn_cast<DefInit>(LHS))
- return StringInit::get(LHSd->getDef()->getName());
+ return StringInit::get(LHSd->getAsString());
if (IntInit *LHSi = dyn_cast<IntInit>(LHS))
return StringInit::get(LHSi->getAsString());
@@ -987,11 +887,11 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg,
MultiClass *CurMultiClass) {
// If this is a dag, recurse
if (auto *TArg = dyn_cast<TypedInit>(Arg))
- if (TArg->getType()->getAsString() == "dag")
+ if (isa<DagRecTy>(TArg->getType()))
return ForeachHelper(LHS, Arg, RHSo, Type, CurRec, CurMultiClass);
std::vector<Init *> NewOperands;
- for (int i = 0; i < RHSo->getNumOperands(); ++i) {
+ for (unsigned i = 0; i < RHSo->getNumOperands(); ++i) {
if (auto *RHSoo = dyn_cast<OpInit>(RHSo->getOperand(i))) {
if (Init *Result = EvaluateOperation(RHSoo, LHS, Arg,
Type, CurRec, CurMultiClass))
@@ -1013,8 +913,6 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg,
static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
Record *CurRec, MultiClass *CurMultiClass) {
- DagInit *MHSd = dyn_cast<DagInit>(MHS);
- ListInit *MHSl = dyn_cast<ListInit>(MHS);
OpInit *RHSo = dyn_cast<OpInit>(RHS);
@@ -1026,55 +924,52 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
if (!LHSt)
PrintFatalError(CurRec->getLoc(), "!foreach requires typed variable\n");
- if ((MHSd && isa<DagRecTy>(Type)) || (MHSl && isa<ListRecTy>(Type))) {
- if (MHSd) {
- Init *Val = MHSd->getOperator();
- Init *Result = EvaluateOperation(RHSo, LHS, Val,
- Type, CurRec, CurMultiClass);
- if (Result)
- Val = Result;
- std::vector<std::pair<Init *, std::string> > args;
- for (unsigned int i = 0; i < MHSd->getNumArgs(); ++i) {
- Init *Arg;
- std::string ArgName;
- Arg = MHSd->getArg(i);
- ArgName = MHSd->getArgName(i);
- // Process args
- Init *Result = EvaluateOperation(RHSo, LHS, Arg, Type,
- CurRec, CurMultiClass);
- if (Result)
- Arg = Result;
- // TODO: Process arg names
- args.push_back(std::make_pair(Arg, ArgName));
- }
- return DagInit::get(Val, "", args);
+ DagInit *MHSd = dyn_cast<DagInit>(MHS);
+ if (MHSd && isa<DagRecTy>(Type)) {
+ Init *Val = MHSd->getOperator();
+ if (Init *Result = EvaluateOperation(RHSo, LHS, Val,
+ Type, CurRec, CurMultiClass))
+ Val = Result;
+ std::vector<std::pair<Init *, std::string> > args;
+ for (unsigned int i = 0; i < MHSd->getNumArgs(); ++i) {
+ Init *Arg = MHSd->getArg(i);
+ std::string ArgName = MHSd->getArgName(i);
+ // Process args
+ if (Init *Result = EvaluateOperation(RHSo, LHS, Arg, Type,
+ CurRec, CurMultiClass))
+ Arg = Result;
+ // TODO: Process arg names
+ args.push_back(std::make_pair(Arg, ArgName));
- if (MHSl) {
- std::vector<Init *> NewOperands;
- std::vector<Init *> NewList(MHSl->begin(), MHSl->end());
- for (Init *&Item : NewList) {
- NewOperands.clear();
- for(int i = 0; i < RHSo->getNumOperands(); ++i) {
- // First, replace the foreach variable with the list item
- if (LHS->getAsString() == RHSo->getOperand(i)->getAsString())
- NewOperands.push_back(Item);
- else
- NewOperands.push_back(RHSo->getOperand(i));
- }
- // Now run the operator and use its result as the new list item
- const OpInit *NewOp = RHSo->clone(NewOperands);
- Init *NewItem = NewOp->Fold(CurRec, CurMultiClass);
- if (NewItem != NewOp)
- Item = NewItem;
+ return DagInit::get(Val, "", args);
+ }
+ ListInit *MHSl = dyn_cast<ListInit>(MHS);
+ if (MHSl && isa<ListRecTy>(Type)) {
+ std::vector<Init *> NewOperands;
+ std::vector<Init *> NewList(MHSl->begin(), MHSl->end());
+ for (Init *&Item : NewList) {
+ NewOperands.clear();
+ for(unsigned i = 0; i < RHSo->getNumOperands(); ++i) {
+ // First, replace the foreach variable with the list item
+ if (LHS->getAsString() == RHSo->getOperand(i)->getAsString())
+ NewOperands.push_back(Item);
+ else
+ NewOperands.push_back(RHSo->getOperand(i));
- return ListInit::get(NewList, MHSl->getType());
+ // Now run the operator and use its result as the new list item
+ const OpInit *NewOp = RHSo->clone(NewOperands);
+ Init *NewItem = NewOp->Fold(CurRec, CurMultiClass);
+ if (NewItem != NewOp)
+ Item = NewItem;
+ return ListInit::get(NewList, MHSl->getType());
return nullptr;
@@ -1196,6 +1091,82 @@ RecTy *TypedInit::getFieldType(const std::string &FieldName) const {
Init *
+TypedInit::convertInitializerTo(RecTy *Ty) const {
+ if (isa<IntRecTy>(Ty)) {
+ if (getType()->typeIsConvertibleTo(Ty))
+ return const_cast<TypedInit *>(this);
+ return nullptr;
+ }
+ if (isa<StringRecTy>(Ty)) {
+ if (isa<StringRecTy>(getType()))
+ return const_cast<TypedInit *>(this);
+ return nullptr;
+ }
+ if (isa<BitRecTy>(Ty)) {
+ // Accept variable if it is already of bit type!
+ if (isa<BitRecTy>(getType()))
+ return const_cast<TypedInit *>(this);
+ if (auto *BitsTy = dyn_cast<BitsRecTy>(getType())) {
+ // Accept only bits<1> expression.
+ if (BitsTy->getNumBits() == 1)
+ return const_cast<TypedInit *>(this);
+ return nullptr;
+ }
+ // Ternary !if can be converted to bit, but only if both sides are
+ // convertible to a bit.
+ if (const auto *TOI = dyn_cast<TernOpInit>(this)) {
+ if (TOI->getOpcode() == TernOpInit::TernaryOp::IF &&
+ TOI->getMHS()->convertInitializerTo(BitRecTy::get()) &&
+ TOI->getRHS()->convertInitializerTo(BitRecTy::get()))
+ return const_cast<TypedInit *>(this);
+ return nullptr;
+ }
+ return nullptr;
+ }
+ if (auto *BRT = dyn_cast<BitsRecTy>(Ty)) {
+ if (BRT->getNumBits() == 1 && isa<BitRecTy>(getType()))
+ return BitsInit::get(const_cast<TypedInit *>(this));
+ if (getType()->typeIsConvertibleTo(BRT)) {
+ SmallVector<Init *, 16> NewBits(BRT->getNumBits());
+ for (unsigned i = 0; i != BRT->getNumBits(); ++i)
+ NewBits[i] = VarBitInit::get(const_cast<TypedInit *>(this), i);
+ return BitsInit::get(NewBits);
+ }
+ return nullptr;
+ }
+ if (auto *DLRT = dyn_cast<ListRecTy>(Ty)) {
+ if (auto *SLRT = dyn_cast<ListRecTy>(getType()))
+ if (SLRT->getElementType()->typeIsConvertibleTo(DLRT->getElementType()))
+ return const_cast<TypedInit *>(this);
+ return nullptr;
+ }
+ if (auto *DRT = dyn_cast<DagRecTy>(Ty)) {
+ if (getType()->typeIsConvertibleTo(DRT))
+ return const_cast<TypedInit *>(this);
+ return nullptr;
+ }
+ if (auto *SRRT = dyn_cast<RecordRecTy>(Ty)) {
+ // Ensure that this is compatible with Rec.
+ if (RecordRecTy *DRRT = dyn_cast<RecordRecTy>(getType()))
+ if (DRRT->getRecord()->isSubClassOf(SRRT->getRecord()) ||
+ DRRT->getRecord() == SRRT->getRecord())
+ return const_cast<TypedInit *>(this);
+ return nullptr;
+ }
+ return nullptr;
+Init *
TypedInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
BitsRecTy *T = dyn_cast<BitsRecTy>(getType());
if (!T) return nullptr; // Cannot subscript a non-bits variable.
@@ -1267,7 +1238,7 @@ Init *VarInit::resolveListElementReference(Record &R,
if (!LI)
return VarListElementInit::get(cast<TypedInit>(RV->getValue()), Elt);
- if (Elt >= LI->getSize())
+ if (Elt >= LI->size())
return nullptr; // Out of range reference.
Init *E = LI->getElement(Elt);
// If the element is set to some value, or if we are resolving a reference
@@ -1324,6 +1295,13 @@ VarBitInit *VarBitInit::get(TypedInit *T, unsigned B) {
return I.get();
+Init *VarBitInit::convertInitializerTo(RecTy *Ty) const {
+ if (isa<BitRecTy>(Ty))
+ return const_cast<VarBitInit *>(this);
+ return nullptr;
std::string VarBitInit::getAsString() const {
return TI->getAsString() + "{" + utostr(Bit) + "}";
@@ -1371,8 +1349,8 @@ Init *VarListElementInit:: resolveListElementReference(Record &R,
unsigned Elt) const {
if (Init *Result = TI->resolveListElementReference(R, RV, Element)) {
if (TypedInit *TInit = dyn_cast<TypedInit>(Result)) {
- Init *Result2 = TInit->resolveListElementReference(R, RV, Elt);
- if (Result2) return Result2;
+ if (Init *Result2 = TInit->resolveListElementReference(R, RV, Elt))
+ return Result2;
return VarListElementInit::get(TInit, Elt);
return Result;
@@ -1385,6 +1363,13 @@ DefInit *DefInit::get(Record *R) {
return R->getDefInit();
+Init *DefInit::convertInitializerTo(RecTy *Ty) const {
+ if (auto *RRT = dyn_cast<RecordRecTy>(Ty))
+ if (getDef()->isSubClassOf(RRT->getRecord()))
+ return const_cast<DefInit *>(this);
+ return nullptr;
RecTy *DefInit::getFieldType(const std::string &FieldName) const {
if (const RecordVal *RV = Def->getValue(FieldName))
return RV->getType();
@@ -1422,7 +1407,7 @@ Init *FieldInit::resolveListElementReference(Record &R, const RecordVal *RV,
unsigned Elt) const {
if (Init *ListVal = Rec->getFieldInit(R, RV, FieldName))
if (ListInit *LI = dyn_cast<ListInit>(ListVal)) {
- if (Elt >= LI->getSize()) return nullptr;
+ if (Elt >= LI->size()) return nullptr;
Init *E = LI->getElement(Elt);
// If the element is set to some value, or if we are resolving a
@@ -1501,6 +1486,13 @@ void DagInit::Profile(FoldingSetNodeID &ID) const {
ProfileDagInit(ID, Val, ValName, Args, ArgNames);
+Init *DagInit::convertInitializerTo(RecTy *Ty) const {
+ if (isa<DagRecTy>(Ty))
+ return const_cast<DagInit *>(this);
+ return nullptr;
Init *DagInit::resolveReferences(Record &R, const RecordVal *RV) const {
std::vector<Init*> NewArgs;
for (unsigned i = 0, e = Args.size(); i != e; ++i)
@@ -1535,20 +1527,20 @@ std::string DagInit::getAsString() const {
// Other implementations
-RecordVal::RecordVal(Init *N, RecTy *T, unsigned P)
- : Name(N), Ty(T), Prefix(P) {
- Value = Ty->convertValue(UnsetInit::get());
+RecordVal::RecordVal(Init *N, RecTy *T, bool P)
+ : NameAndPrefix(N, P), Ty(T) {
+ Value = UnsetInit::get()->convertInitializerTo(Ty);
assert(Value && "Cannot create unset value for current type!");
-RecordVal::RecordVal(const std::string &N, RecTy *T, unsigned P)
- : Name(StringInit::get(N)), Ty(T), Prefix(P) {
- Value = Ty->convertValue(UnsetInit::get());
+RecordVal::RecordVal(const std::string &N, RecTy *T, bool P)
+ : NameAndPrefix(StringInit::get(N), P), Ty(T) {
+ Value = UnsetInit::get()->convertInitializerTo(Ty);
assert(Value && "Cannot create unset value for current type!");
const std::string &RecordVal::getName() const {
- return cast<StringInit>(Name)->getValue();
+ return cast<StringInit>(getNameInit())->getValue();
void RecordVal::dump() const { errs() << *this; }
@@ -1577,8 +1569,7 @@ void Record::init() {
void Record::checkName() {
// Ensure the record name has string type.
const TypedInit *TypedName = cast<const TypedInit>(Name);
- RecTy *Type = TypedName->getType();
- if (!isa<StringRecTy>(Type))
+ if (!isa<StringRecTy>(TypedName->getType()))
PrintFatalError(getLoc(), "Record name is not a string!");
@@ -1649,9 +1640,11 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) {
const std::vector<Init *> &TArgs = R.getTemplateArgs();
if (!TArgs.empty()) {
OS << "<";
- for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
- if (i) OS << ", ";
- const RecordVal *RV = R.getValue(TArgs[i]);
+ bool NeedComma = false;
+ for (const Init *TA : TArgs) {
+ if (NeedComma) OS << ", ";
+ NeedComma = true;
+ const RecordVal *RV = R.getValue(TA);
assert(RV && "Template argument record not found??");
RV->print(OS, false);
@@ -1662,18 +1655,17 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) {
const std::vector<Record*> &SC = R.getSuperClasses();
if (!SC.empty()) {
OS << "\t//";
- for (unsigned i = 0, e = SC.size(); i != e; ++i)
- OS << " " << SC[i]->getNameInitAsString();
+ for (const Record *Super : SC)
+ OS << " " << Super->getNameInitAsString();
OS << "\n";
- const std::vector<RecordVal> &Vals = R.getValues();
- for (unsigned i = 0, e = Vals.size(); i != e; ++i)
- if (Vals[i].getPrefix() && !R.isTemplateArg(Vals[i].getName()))
- OS << Vals[i];
- for (unsigned i = 0, e = Vals.size(); i != e; ++i)
- if (!Vals[i].getPrefix() && !R.isTemplateArg(Vals[i].getName()))
- OS << Vals[i];
+ for (const RecordVal &Val : R.getValues())
+ if (Val.getPrefix() && !R.isTemplateArg(Val.getName()))
+ OS << Val;
+ for (const RecordVal &Val : R.getValues())
+ if (!Val.getPrefix() && !R.isTemplateArg(Val.getName()))
+ OS << Val;
return OS << "}\n";
@@ -1746,8 +1738,8 @@ std::vector<Record*>
Record::getValueAsListOfDefs(StringRef FieldName) const {
ListInit *List = getValueAsListInit(FieldName);
std::vector<Record*> Defs;
- for (unsigned i = 0; i < List->getSize(); i++) {
- if (DefInit *DI = dyn_cast<DefInit>(List->getElement(i)))
+ for (Init *I : List->getValues()) {
+ if (DefInit *DI = dyn_cast<DefInit>(I))
PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
@@ -1780,8 +1772,8 @@ std::vector<int64_t>
Record::getValueAsListOfInts(StringRef FieldName) const {
ListInit *List = getValueAsListInit(FieldName);
std::vector<int64_t> Ints;
- for (unsigned i = 0; i < List->getSize(); i++) {
- if (IntInit *II = dyn_cast<IntInit>(List->getElement(i)))
+ for (Init *I : List->getValues()) {
+ if (IntInit *II = dyn_cast<IntInit>(I))
PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
@@ -1798,9 +1790,9 @@ std::vector<std::string>
Record::getValueAsListOfStrings(StringRef FieldName) const {
ListInit *List = getValueAsListInit(FieldName);
std::vector<std::string> Strings;
- for (unsigned i = 0; i < List->getSize(); i++) {
- if (StringInit *II = dyn_cast<StringInit>(List->getElement(i)))
- Strings.push_back(II->getValue());
+ for (Init *I : List->getValues()) {
+ if (StringInit *SI = dyn_cast<StringInit>(I))
+ Strings.push_back(SI->getValue());
PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
FieldName + "' does not have a list of strings initializer!");
@@ -1922,23 +1914,23 @@ Init *llvm::QualifyName(Record &CurRec, MultiClass *CurMultiClass,
RecTy *Type = cast<TypedInit>(Name)->getType();
BinOpInit *NewName =
- BinOpInit::get(BinOpInit::STRCONCAT,
- BinOpInit::get(BinOpInit::STRCONCAT,
- CurRec.getNameInit(),
- StringInit::get(Scoper),
- Type)->Fold(&CurRec, CurMultiClass),
- Name,
- Type);
+ BinOpInit::get(BinOpInit::STRCONCAT,
+ BinOpInit::get(BinOpInit::STRCONCAT,
+ CurRec.getNameInit(),
+ StringInit::get(Scoper),
+ Type)->Fold(&CurRec, CurMultiClass),
+ Name,
+ Type);
if (CurMultiClass && Scoper != "::") {
NewName =
- BinOpInit::get(BinOpInit::STRCONCAT,
- BinOpInit::get(BinOpInit::STRCONCAT,
- CurMultiClass->Rec.getNameInit(),
- StringInit::get("::"),
- Type)->Fold(&CurRec, CurMultiClass),
- NewName->Fold(&CurRec, CurMultiClass),
- Type);
+ BinOpInit::get(BinOpInit::STRCONCAT,
+ BinOpInit::get(BinOpInit::STRCONCAT,
+ CurMultiClass->Rec.getNameInit(),
+ StringInit::get("::"),
+ Type)->Fold(&CurRec, CurMultiClass),
+ NewName->Fold(&CurRec, CurMultiClass),
+ Type);
return NewName->Fold(&CurRec, CurMultiClass);
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index 0e654f9..15df25a 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -118,7 +118,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
for (unsigned i = 0, e = BitList.size(); i != e; ++i) {
unsigned Bit = BitList[i];
if (NewBits[Bit])
- return Error(Loc, "Cannot set bit #" + utostr(Bit) + " of value '" +
+ return Error(Loc, "Cannot set bit #" + Twine(Bit) + " of value '" +
ValName->getAsUnquotedString() + "' more than once");
NewBits[Bit] = BInit->getBit(i);
@@ -148,9 +148,8 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
Record *SC = SubClass.Rec;
// Add all of the values in the subclass into the current class.
- const std::vector<RecordVal> &Vals = SC->getValues();
- for (unsigned i = 0, e = Vals.size(); i != e; ++i)
- if (AddValue(CurRec, SubClass.RefRange.Start, Vals[i]))
+ for (const RecordVal &Val : SC->getValues())
+ if (AddValue(CurRec, SubClass.RefRange.Start, Val))
return true;
const std::vector<Init *> &TArgs = SC->getTemplateArgs();
@@ -178,7 +177,7 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
} else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) {
return Error(SubClass.RefRange.Start,
"Value not specified for template argument #" +
- utostr(i) + " (" + TArgs[i]->getAsUnquotedString() +
+ Twine(i) + " (" + TArgs[i]->getAsUnquotedString() +
") of subclass '" + SC->getNameInitAsString() + "'!");
@@ -272,7 +271,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
} else if (!CurRec->getValue(SMCTArgs[i])->getValue()->isComplete()) {
return Error(SubMultiClass.RefRange.Start,
"Value not specified for template argument #" +
- utostr(i) + " (" + SMCTArgs[i]->getAsUnquotedString() +
+ Twine(i) + " (" + SMCTArgs[i]->getAsUnquotedString() +
") of subclass '" + SMC->Rec.getNameInitAsString() + "'!");
@@ -309,7 +308,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){
// Process each value.
- for (int64_t i = 0; i < List->getSize(); ++i) {
+ for (unsigned i = 0; i < List->size(); ++i) {
Init *ItemVal = List->resolveListElementReference(*CurRec, nullptr, i);
IterVals.push_back(IterRecord(CurLoop.IterVar, ItemVal));
if (ProcessForeachDefs(CurRec, Loc, IterVals))
@@ -325,9 +324,9 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){
auto IterRec = make_unique<Record>(*CurRec);
// Set the iterator values now.
- for (unsigned i = 0, e = IterVals.size(); i != e; ++i) {
- VarInit *IterVar = IterVals[i].IterVar;
- TypedInit *IVal = dyn_cast<TypedInit>(IterVals[i].IterValue);
+ for (IterRecord &IR : IterVals) {
+ VarInit *IterVar = IR.IterVar;
+ TypedInit *IVal = dyn_cast<TypedInit>(IR.IterValue);
if (!IVal)
return Error(Loc, "foreach iterator value is untyped");
@@ -1296,7 +1295,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
// All other values must be convertible to just a single bit.
Init *Bit = Vals[i]->convertInitializerTo(BitRecTy::get());
if (!Bit) {
- Error(BraceLoc, "Element #" + utostr(i) + " (" + Vals[i]->getAsString()+
+ Error(BraceLoc, "Element #" + Twine(i) + " (" + Vals[i]->getAsString() +
") is not convertable to a bit");
return nullptr;
@@ -1315,11 +1314,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
if (ItemType) {
ListRecTy *ListType = dyn_cast<ListRecTy>(ItemType);
if (!ListType) {
- std::string s;
- raw_string_ostream ss(s);
- ss << "Type mismatch for list, expected list type, got "
- << ItemType->getAsString();
- TokError(ss.str());
+ TokError(Twine("Type mismatch for list, expected list type, got ") +
+ ItemType->getAsString());
return nullptr;
GivenListTy = ListType;
@@ -1604,7 +1600,7 @@ TGParser::ParseDagArgList(Record *CurRec) {
// DagArg ::= VARNAME
if (Lex.getCode() == tgtok::VarName) {
// A missing value is treated like '?'.
- Result.push_back(std::make_pair(UnsetInit::get(), Lex.getCurStrVal()));
+ Result.emplace_back(UnsetInit::get(), Lex.getCurStrVal());
} else {
// DagArg ::= Value (':' VARNAME)?
@@ -1810,8 +1806,8 @@ VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) {
assert(!IterType && "Type already initialized?");
IterType = IntRecTy::get();
std::vector<Init*> Values;
- for (unsigned i = 0, e = Ranges.size(); i != e; ++i)
- Values.push_back(IntInit::get(Ranges[i]));
+ for (unsigned R : Ranges)
+ Values.push_back(IntInit::get(R));
ForeachListValue = ListInit::get(Values, IterType);
@@ -1937,10 +1933,9 @@ bool TGParser::ParseBody(Record *CurRec) {
/// \brief Apply the current let bindings to \a CurRec.
/// \returns true on error, false otherwise.
bool TGParser::ApplyLetStack(Record *CurRec) {
- for (unsigned i = 0, e = LetStack.size(); i != e; ++i)
- for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j)
- if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name,
- LetStack[i][j].Bits, LetStack[i][j].Value))
+ for (std::vector<LetRecord> &LetInfo : LetStack)
+ for (LetRecord &LR : LetInfo)
+ if (SetValue(CurRec, LR.Loc, LR.Name, LR.Bits, LR.Value))
return true;
return false;
@@ -2177,7 +2172,7 @@ std::vector<LetRecord> TGParser::ParseLetList() {
if (!Val) return std::vector<LetRecord>();
// Now that we have everything, add the record.
- Result.push_back(LetRecord(Name, Bits, Val, NameLoc));
+ Result.emplace_back(std::move(Name), std::move(Bits), Val, NameLoc);
if (Lex.getCode() != tgtok::comma)
return Result;
@@ -2468,7 +2463,7 @@ bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC,
} else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) {
return Error(SubClassLoc, "value not specified for template argument #" +
- utostr(i) + " (" + TArgs[i]->getAsUnquotedString() +
+ Twine(i) + " (" + TArgs[i]->getAsUnquotedString() +
") of multiclassclass '" + MC.Rec.getNameInitAsString() +
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index a0a09e4..da22d8d 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -206,7 +206,7 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
// FIXME: Can we get anything other than a plain symbol here?
assert(!MO.getTargetFlags() && "Unknown operand target flag!");
- O << *Sym;
+ Sym->print(O, MAI);
printOffset(MO.getOffset(), O);
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 78a2021..1ea4abc 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -156,6 +156,9 @@ public:
SDNode *SelectLIBM(SDNode *N);
+ SDNode *SelectReadRegister(SDNode *N);
+ SDNode *SelectWriteRegister(SDNode *N);
// Include the pieces autogenerated from the target description.
#include ""
@@ -2114,6 +2117,120 @@ AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
return true;
+// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
+// of the string and obtains the integer values from them and combines these
+// into a single value to be used in the MRS/MSR instruction.
+static int getIntOperandFromRegisterString(StringRef RegString) {
+ SmallVector<StringRef, 5> Fields;
+ RegString.split(Fields, ":");
+ if (Fields.size() == 1)
+ return -1;
+ assert(Fields.size() == 5
+ && "Invalid number of fields in read register string");
+ SmallVector<int, 5> Ops;
+ bool AllIntFields = true;
+ for (StringRef Field : Fields) {
+ unsigned IntField;
+ AllIntFields &= !Field.getAsInteger(10, IntField);
+ Ops.push_back(IntField);
+ }
+ assert(AllIntFields &&
+ "Unexpected non-integer value in special register string.");
+ // Need to combine the integer fields of the string into a single value
+ // based on the bit encoding of MRS/MSR instruction.
+ return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
+ (Ops[3] << 3) | (Ops[4]);
+// Lower the read_register intrinsic to an MRS instruction node if the special
+// register string argument is either of the form detailed in the ALCE (the
+// form described in getIntOperandsFromRegsterString) or is a named register
+// known by the MRS SysReg mapper.
+SDNode *AArch64DAGToDAGISel::SelectReadRegister(SDNode *N) {
+ const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
+ const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ SDLoc DL(N);
+ int Reg = getIntOperandFromRegisterString(RegString->getString());
+ if (Reg != -1)
+ return CurDAG->getMachineNode(AArch64::MRS, DL, N->getSimpleValueType(0),
+ MVT::Other,
+ CurDAG->getTargetConstant(Reg, DL, MVT::i32),
+ N->getOperand(0));
+ // Use the sysreg mapper to map the remaining possible strings to the
+ // value for the register to be used for the instruction operand.
+ AArch64SysReg::MRSMapper mapper;
+ bool IsValidSpecialReg;
+ Reg = mapper.fromString(RegString->getString(),
+ Subtarget->getFeatureBits(),
+ IsValidSpecialReg);
+ if (IsValidSpecialReg)
+ return CurDAG->getMachineNode(AArch64::MRS, DL, N->getSimpleValueType(0),
+ MVT::Other,
+ CurDAG->getTargetConstant(Reg, DL, MVT::i32),
+ N->getOperand(0));
+ return nullptr;
+// Lower the write_register intrinsic to an MSR instruction node if the special
+// register string argument is either of the form detailed in the ALCE (the
+// form described in getIntOperandsFromRegsterString) or is a named register
+// known by the MSR SysReg mapper.
+SDNode *AArch64DAGToDAGISel::SelectWriteRegister(SDNode *N) {
+ const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
+ const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ SDLoc DL(N);
+ int Reg = getIntOperandFromRegisterString(RegString->getString());
+ if (Reg != -1)
+ return CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
+ CurDAG->getTargetConstant(Reg, DL, MVT::i32),
+ N->getOperand(2), N->getOperand(0));
+ // Check if the register was one of those allowed as the pstatefield value in
+ // the MSR (immediate) instruction. To accept the values allowed in the
+ // pstatefield for the MSR (immediate) instruction, we also require that an
+ // immediate value has been provided as an argument, we know that this is
+ // the case as it has been ensured by semantic checking.
+ AArch64PState::PStateMapper PMapper;
+ bool IsValidSpecialReg;
+ Reg = PMapper.fromString(RegString->getString(),
+ Subtarget->getFeatureBits(),
+ IsValidSpecialReg);
+ if (IsValidSpecialReg) {
+ assert (isa<ConstantSDNode>(N->getOperand(2))
+ && "Expected a constant integer expression.");
+ uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ return CurDAG->getMachineNode(AArch64::MSRpstate, DL, MVT::Other,
+ CurDAG->getTargetConstant(Reg, DL, MVT::i32),
+ CurDAG->getTargetConstant(Immed, DL, MVT::i16),
+ N->getOperand(0));
+ }
+ // Use the sysreg mapper to attempt to map the remaining possible strings
+ // to the value for the register to be used for the MSR (register)
+ // instruction operand.
+ AArch64SysReg::MSRMapper Mapper;
+ Reg = Mapper.fromString(RegString->getString(),
+ Subtarget->getFeatureBits(),
+ IsValidSpecialReg);
+ if (IsValidSpecialReg)
+ return CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
+ CurDAG->getTargetConstant(Reg, DL, MVT::i32),
+ N->getOperand(2), N->getOperand(0));
+ return nullptr;
SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
// Dump information about the Node being selected
DEBUG(errs() << "Selecting: ");
@@ -2135,6 +2252,16 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
+ if (SDNode *Res = SelectReadRegister(Node))
+ return Res;
+ break;
+ if (SDNode *Res = SelectWriteRegister(Node))
+ return Res;
+ break;
case ISD::ADD:
if (SDNode *I = SelectMLAV64LaneV128(Node))
return I;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index e6108c3..1616ff1 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -76,6 +76,9 @@ cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
+/// Value type used for condition codes.
+static const MVT MVT_CC = MVT::i32;
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
@@ -807,6 +810,9 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
+ case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
+ case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
+ case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
case AArch64ISD::FMIN: return "AArch64ISD::FMIN";
case AArch64ISD::FMAX: return "AArch64ISD::FMAX";
@@ -1165,10 +1171,133 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
LHS = LHS.getOperand(0);
- return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS)
+ return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
+static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, SDValue CCOp,
+ SDValue Condition, unsigned NZCV,
+ SDLoc DL, SelectionDAG &DAG) {
+ unsigned Opcode = 0;
+ if (LHS.getValueType().isFloatingPoint())
+ Opcode = AArch64ISD::FCCMP;
+ else if (RHS.getOpcode() == ISD::SUB) {
+ SDValue SubOp0 = RHS.getOperand(0);
+ if (const ConstantSDNode *SubOp0C = dyn_cast<ConstantSDNode>(SubOp0))
+ if (SubOp0C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ // See emitComparison() on why we can only do this for SETEQ and SETNE.
+ Opcode = AArch64ISD::CCMN;
+ RHS = RHS.getOperand(1);
+ }
+ }
+ if (Opcode == 0)
+ Opcode = AArch64ISD::CCMP;
+ SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
+ return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
+/// Returns true if @p Val is a tree of AND/OR/SETCC operations.
+static bool isConjunctionDisjunctionTree(const SDValue Val, unsigned Depth) {
+ if (!Val.hasOneUse())
+ return false;
+ if (Val->getOpcode() == ISD::SETCC)
+ return true;
+ // Protect against stack overflow.
+ if (Depth > 1000)
+ return false;
+ if (Val->getOpcode() == ISD::AND || Val->getOpcode() == ISD::OR) {
+ SDValue O0 = Val->getOperand(0);
+ SDValue O1 = Val->getOperand(1);
+ return isConjunctionDisjunctionTree(O0, Depth+1) &&
+ isConjunctionDisjunctionTree(O1, Depth+1);
+ }
+ return false;
+/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
+/// of CCMP/CFCMP ops. For example (SETCC_0 & SETCC_1) with condition cond0 and
+/// cond1 can be transformed into "CMP; CCMP" with CCMP executing on cond_0
+/// and setting flags to inversed(cond_1) otherwise.
+/// This recursive function produces DAG nodes that produce condition flags
+/// suitable to determine the truth value of @p Val (which is AND/OR/SETCC)
+/// by testing the result for the condition set to @p OutCC. If @p Negate is
+/// set the opposite truth value is produced. If @p CCOp and @p Condition are
+/// given then conditional comparison are created so that false is reported
+/// when they are false.
+static SDValue emitConjunctionDisjunctionTree(
+ SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate,
+ SDValue CCOp = SDValue(), AArch64CC::CondCode Condition = AArch64CC::AL) {
+ assert(isConjunctionDisjunctionTree(Val, 0));
+ // We're at a tree leaf, produce a c?f?cmp.
+ unsigned Opcode = Val->getOpcode();
+ if (Opcode == ISD::SETCC) {
+ SDValue LHS = Val->getOperand(0);
+ SDValue RHS = Val->getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
+ bool isInteger = LHS.getValueType().isInteger();
+ if (Negate)
+ CC = getSetCCInverse(CC, isInteger);
+ SDLoc DL(Val);
+ // Determine OutCC and handle FP special case.
+ if (isInteger) {
+ OutCC = changeIntCCToAArch64CC(CC);
+ } else {
+ assert(LHS.getValueType().isFloatingPoint());
+ AArch64CC::CondCode ExtraCC;
+ changeFPCCToAArch64CC(CC, OutCC, ExtraCC);
+ // Surpisingly some floating point conditions can't be tested with a
+ // single condition code. Construct an additional comparison in this case.
+ // See comment below on how we deal with OR conditions.
+ if (ExtraCC != AArch64CC::AL) {
+ SDValue ExtraCmp;
+ if (!CCOp.getNode())
+ ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
+ else {
+ SDValue ConditionOp = DAG.getConstant(Condition, DL, MVT_CC);
+ // Note that we want the inverse of ExtraCC, so NZCV is not inversed.
+ unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(ExtraCC);
+ ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp,
+ }
+ CCOp = ExtraCmp;
+ Condition = AArch64CC::getInvertedCondCode(ExtraCC);
+ OutCC = AArch64CC::getInvertedCondCode(OutCC);
+ }
+ }
+ // Produce a normal comparison if we are first in the chain
+ if (!CCOp.getNode())
+ return emitComparison(LHS, RHS, CC, DL, DAG);
+ // Otherwise produce a ccmp.
+ SDValue ConditionOp = DAG.getConstant(Condition, DL, MVT_CC);
+ AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
+ unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
+ return emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, NZCV, DL,
+ DAG);
+ }
+ // Construct comparison sequence for the left hand side.
+ SDValue LHS = Val->getOperand(0);
+ SDValue RHS = Val->getOperand(1);
+ // We can only implement AND-like behaviour here, but negation is free. So we
+ // use (not (and (not x) (not y))) to implement (or x y).
+ bool isOr = Val->getOpcode() == ISD::OR;
+ assert((isOr || Val->getOpcode() == ISD::AND) && "Should have AND or OR.");
+ Negate ^= isOr;
+ AArch64CC::CondCode RHSCC;
+ SDValue CmpR =
+ emitConjunctionDisjunctionTree(DAG, RHS, RHSCC, isOr, CCOp, Condition);
+ SDValue CmpL =
+ emitConjunctionDisjunctionTree(DAG, LHS, OutCC, isOr, CmpR, RHSCC);
+ if (Negate)
+ OutCC = AArch64CC::getInvertedCondCode(OutCC);
+ return CmpL;
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) {
SDValue Cmp;
@@ -1227,47 +1356,55 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
- // For the i8 operand, the largest immediate is 255, so this can be easily
- // encoded in the compare instruction. For the i16 operand, however, the
- // largest immediate cannot be encoded in the compare.
- // Therefore, use a sign extending load and cmn to avoid materializing the -1
- // constant. For example,
- // movz w1, #65535
- // ldrh w0, [x0, #0]
- // cmp w0, w1
- // >
- // ldrsh w0, [x0, #0]
- // cmn w0, #1
- // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
- // if and only if (sext LHS) == (sext RHS). The checks are in place to ensure
- // both the LHS and RHS are truely zero extended and to make sure the
- // transformation is profitable.
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
- if ((cast<ConstantSDNode>(RHS)->getZExtValue() >> 16 == 0) &&
- isa<LoadSDNode>(LHS)) {
- if (cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
- cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
- LHS.getNode()->hasNUsesOfValue(1, 0)) {
- int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
- if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
- SDValue SExt =
- DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
- DAG.getValueType(MVT::i16));
- Cmp = emitComparison(SExt,
- DAG.getConstant(ValueofRHS, dl,
- RHS.getValueType()),
- CC, dl, DAG);
- AArch64CC = changeIntCCToAArch64CC(CC);
- AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
- return Cmp;
- }
+ const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
+ // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
+ // For the i8 operand, the largest immediate is 255, so this can be easily
+ // encoded in the compare instruction. For the i16 operand, however, the
+ // largest immediate cannot be encoded in the compare.
+ // Therefore, use a sign extending load and cmn to avoid materializing the
+ // -1 constant. For example,
+ // movz w1, #65535
+ // ldrh w0, [x0, #0]
+ // cmp w0, w1
+ // >
+ // ldrsh w0, [x0, #0]
+ // cmn w0, #1
+ // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
+ // if and only if (sext LHS) == (sext RHS). The checks are in place to
+ // ensure both the LHS and RHS are truely zero extended and to make sure the
+ // transformation is profitable.
+ if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
+ cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
+ cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
+ LHS.getNode()->hasNUsesOfValue(1, 0)) {
+ int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
+ if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
+ SDValue SExt =
+ DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
+ DAG.getValueType(MVT::i16));
+ Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
+ RHS.getValueType()),
+ CC, dl, DAG);
+ AArch64CC = changeIntCCToAArch64CC(CC);
+ goto CreateCCNode;
+ if ((RHSC->isNullValue() || RHSC->isOne()) &&
+ isConjunctionDisjunctionTree(LHS, 0)) {
+ bool Negate = (CC == ISD::SETNE) ^ RHSC->isNullValue();
+ Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC, Negate);
+ goto CreateCCNode;
+ }
Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
AArch64CC = changeIntCCToAArch64CC(CC);
- AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
+ AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
return Cmp;
@@ -4065,7 +4202,8 @@ unsigned AArch64TargetLowering::getRegisterByName(const char* RegName,
if (Reg)
return Reg;
- report_fatal_error("Invalid register name global variable");
+ report_fatal_error(Twine("Invalid register name \""
+ + StringRef(RegName) + "\"."));
SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
@@ -6741,7 +6879,8 @@ bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
+ Type *Ty,
+ unsigned AS) const {
// AArch64 has five basic addressing modes:
// reg
// reg + 9-bit signed offset
@@ -6792,7 +6931,8 @@ bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM,
- Type *Ty) const {
+ Type *Ty,
+ unsigned AS) const {
// Scaling factors are not free at all.
// Operands | Rt Latency
// -------------------------------------------
@@ -6800,7 +6940,7 @@ int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM,
// -------------------------------------------
// Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
// Rt, [Xn, Wm, <extend> #imm] |
- if (isLegalAddressingMode(AM, Ty))
+ if (isLegalAddressingMode(AM, Ty, AS))
// Scale represents reg2 * scale, thus account for 1 if
// it is not equal to 0 or 1.
return AM.Scale != 0 && AM.Scale != 1;
@@ -9120,3 +9260,8 @@ bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
return Ty->isArrayTy();
+bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
+ EVT) const {
+ return false;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 0d9b8b7..db192c7 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -58,6 +58,11 @@ enum NodeType : unsigned {
+ // Conditional compares. Operands: left,right,falsecc,cc,flags
// Floating point comparison
@@ -314,14 +319,16 @@ public:
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
/// \brief Return the cost of the scaling factor used in the addressing
/// mode represented by AM for this target, for a load/store
/// of the specified type.
/// If the AM is supported, the return value must be >= 0.
/// If the AM is not supported, it returns a negative value.
- int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override;
+ int getScalingFactorCost(const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
@@ -506,6 +513,8 @@ private:
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
CallingConv::ID CallConv,
bool isVarArg) const override;
+ bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
namespace AArch64 {
diff --git a/lib/Target/AArch64/ b/lib/Target/AArch64/
index 3b8b668..1fe9c7f 100644
--- a/lib/Target/AArch64/
+++ b/lib/Target/AArch64/
@@ -525,6 +525,13 @@ def imm0_31 : Operand<i64>, ImmLeaf<i64, [{
let ParserMatchClass = Imm0_31Operand;
+// True if the 32-bit immediate is in the range [0,31]
+def imm32_0_31 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 32;
+}]> {
+ let ParserMatchClass = Imm0_31Operand;
// imm0_15 predicate - True if the immediate is in the range [0,15]
def imm0_15 : Operand<i64>, ImmLeaf<i64, [{
return ((uint64_t)Imm) < 16;
@@ -542,7 +549,9 @@ def imm0_7 : Operand<i64>, ImmLeaf<i64, [{
// imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15]
def imm32_0_15 : Operand<i32>, ImmLeaf<i32, [{
return ((uint32_t)Imm) < 16;
+}]> {
+ let ParserMatchClass = Imm0_15Operand;
// An arithmetic shifter operand:
// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr
@@ -2068,9 +2077,12 @@ multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic,
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm>
- : I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond),
- asm, "\t$Rn, $imm, $nzcv, $cond", "", []>,
+class BaseCondComparisonImm<bit op, RegisterClass regtype, ImmLeaf immtype,
+ string mnemonic, SDNode OpNode>
+ : I<(outs), (ins regtype:$Rn, immtype:$imm, imm32_0_15:$nzcv, ccode:$cond),
+ mnemonic, "\t$Rn, $imm, $nzcv, $cond", "",
+ [(set NZCV, (OpNode regtype:$Rn, immtype:$imm, (i32 imm:$nzcv),
+ (i32 imm:$cond), NZCV))]>,
Sched<[WriteI, ReadI]> {
let Uses = [NZCV];
let Defs = [NZCV];
@@ -2090,19 +2102,13 @@ class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm>
let Inst{3-0} = nzcv;
-multiclass CondSetFlagsImm<bit op, string asm> {
- def Wi : BaseCondSetFlagsImm<op, GPR32, asm> {
- let Inst{31} = 0;
- }
- def Xi : BaseCondSetFlagsImm<op, GPR64, asm> {
- let Inst{31} = 1;
- }
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm>
- : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
- asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
+class BaseCondComparisonReg<bit op, RegisterClass regtype, string mnemonic,
+ SDNode OpNode>
+ : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond),
+ mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "",
+ [(set NZCV, (OpNode regtype:$Rn, regtype:$Rm, (i32 imm:$nzcv),
+ (i32 imm:$cond), NZCV))]>,
Sched<[WriteI, ReadI, ReadI]> {
let Uses = [NZCV];
let Defs = [NZCV];
@@ -2122,11 +2128,19 @@ class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm>
let Inst{3-0} = nzcv;
-multiclass CondSetFlagsReg<bit op, string asm> {
- def Wr : BaseCondSetFlagsReg<op, GPR32, asm> {
+multiclass CondComparison<bit op, string mnemonic, SDNode OpNode> {
+ // immediate operand variants
+ def Wi : BaseCondComparisonImm<op, GPR32, imm32_0_31, mnemonic, OpNode> {
+ let Inst{31} = 0;
+ }
+ def Xi : BaseCondComparisonImm<op, GPR64, imm0_31, mnemonic, OpNode> {
+ let Inst{31} = 1;
+ }
+ // register operand variants
+ def Wr : BaseCondComparisonReg<op, GPR32, mnemonic, OpNode> {
let Inst{31} = 0;
- def Xr : BaseCondSetFlagsReg<op, GPR64, asm> {
+ def Xr : BaseCondComparisonReg<op, GPR64, mnemonic, OpNode> {
let Inst{31} = 1;
@@ -3934,11 +3948,14 @@ multiclass FPComparison<bit signalAllNans, string asm,
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseFPCondComparison<bit signalAllNans,
- RegisterClass regtype, string asm>
- : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
- asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
+class BaseFPCondComparison<bit signalAllNans, RegisterClass regtype,
+ string mnemonic, list<dag> pat>
+ : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond),
+ mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", pat>,
Sched<[WriteFCmp]> {
+ let Uses = [NZCV];
+ let Defs = [NZCV];
bits<5> Rn;
bits<5> Rm;
bits<4> nzcv;
@@ -3954,16 +3971,18 @@ class BaseFPCondComparison<bit signalAllNans,
let Inst{3-0} = nzcv;
-multiclass FPCondComparison<bit signalAllNans, string asm> {
- let Defs = [NZCV], Uses = [NZCV] in {
- def Srr : BaseFPCondComparison<signalAllNans, FPR32, asm> {
+multiclass FPCondComparison<bit signalAllNans, string mnemonic,
+ SDPatternOperator OpNode = null_frag> {
+ def Srr : BaseFPCondComparison<signalAllNans, FPR32, mnemonic,
+ [(set NZCV, (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm), (i32 imm:$nzcv),
+ (i32 imm:$cond), NZCV))]> {
let Inst{22} = 0;
- def Drr : BaseFPCondComparison<signalAllNans, FPR64, asm> {
+ def Drr : BaseFPCondComparison<signalAllNans, FPR64, mnemonic,
+ [(set NZCV, (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm), (i32 imm:$nzcv),
+ (i32 imm:$cond), NZCV))]> {
let Inst{22} = 1;
- } // Defs = [NZCV], Uses = [NZCV]
@@ -8822,6 +8841,178 @@ class SHAInstSS<bits<4> opc, string asm, Intrinsic OpNode>
[(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>;
} // end of 'let Predicates = [HasCrypto]'
+// v8.1 atomic instructions extension:
+// * CAS
+// * CASP
+// * SWP
+// * LDOPregister<OP>, and aliases STOPregister<OP>
+// Instruction encodings:
+// 31 30|29 24|23|22|21|20 16|15|14 10|9 5|4 0
+// CAS SZ |001000|1 |A |1 |Rs |R |11111 |Rn |Rt
+// CASP 0|SZ|001000|0 |A |1 |Rs |R |11111 |Rn |Rt
+// SWP SZ |111000|A |R |1 |Rs |1 |OPC|00|Rn |Rt
+// LD SZ |111000|A |R |1 |Rs |0 |OPC|00|Rn |Rt
+// ST SZ |111000|A |R |1 |Rs |0 |OPC|00|Rn |11111
+// Instruction syntax:
+// CAS{<order>}[<size>] <Ws>, <Wt>, [<Xn|SP>]
+// CAS{<order>} <Xs>, <Xt>, [<Xn|SP>]
+// CASP{<order>} <Ws>, <W(s+1)>, <Wt>, <W(t+1)>, [<Xn|SP>]
+// CASP{<order>} <Xs>, <X(s+1)>, <Xt>, <X(t+1)>, [<Xn|SP>]
+// SWP{<order>}[<size>] <Ws>, <Wt>, [<Xn|SP>]
+// SWP{<order>} <Xs>, <Xt>, [<Xn|SP>]
+// LD<OP>{<order>}[<size>] <Ws>, <Wt>, [<Xn|SP>]
+// LD<OP>{<order>} <Xs>, <Xt>, [<Xn|SP>]
+// ST<OP>{<order>}[<size>] <Ws>, [<Xn|SP>]
+// ST<OP>{<order>} <Xs>, [<Xn|SP>]
+let Predicates = [HasV8_1a], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
+class BaseCASEncoding<dag oops, dag iops, string asm, string operands,
+ string cstr, list<dag> pattern>
+ : I<oops, iops, asm, operands, cstr, pattern> {
+ bits<2> Sz;
+ bit NP;
+ bit Acq;
+ bit Rel;
+ bits<5> Rs;
+ bits<5> Rn;
+ bits<5> Rt;
+ let Inst{31-30} = Sz;
+ let Inst{29-24} = 0b001000;
+ let Inst{23} = NP;
+ let Inst{22} = Acq;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rs;
+ let Inst{15} = Rel;
+ let Inst{14-10} = 0b11111;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+class BaseCAS<string order, string size, RegisterClass RC>
+ : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn),
+ "cas" # order # size, "\t$Rs, $Rt, [$Rn]",
+ "$out = $Rs",[]> {
+ let NP = 1;
+multiclass CompareAndSwap<bits<1> Acq, bits<1> Rel, string order> {
+ let Sz = 0b00, Acq = Acq, Rel = Rel in def b : BaseCAS<order, "b", GPR32>;
+ let Sz = 0b01, Acq = Acq, Rel = Rel in def h : BaseCAS<order, "h", GPR32>;
+ let Sz = 0b10, Acq = Acq, Rel = Rel in def s : BaseCAS<order, "", GPR32>;
+ let Sz = 0b11, Acq = Acq, Rel = Rel in def d : BaseCAS<order, "", GPR64>;
+class BaseCASP<string order, string size, RegisterOperand RC>
+ : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn),
+ "casp" # order # size, "\t$Rs, $Rt, [$Rn]",
+ "$out = $Rs",[]> {
+ let NP = 0;
+multiclass CompareAndSwapPair<bits<1> Acq, bits<1> Rel, string order> {
+ let Sz = 0b00, Acq = Acq, Rel = Rel in
+ def s : BaseCASP<order, "", WSeqPairClassOperand>;
+ let Sz = 0b01, Acq = Acq, Rel = Rel in
+ def d : BaseCASP<order, "", XSeqPairClassOperand>;
+let Predicates = [HasV8_1a] in
+class BaseSWP<string order, string size, RegisterClass RC>
+ : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swp" # order # size,
+ "\t$Rs, $Rt, [$Rn]","",[]> {
+ bits<2> Sz;
+ bit Acq;
+ bit Rel;
+ bits<5> Rs;
+ bits<3> opc = 0b000;
+ bits<5> Rn;
+ bits<5> Rt;
+ let Inst{31-30} = Sz;
+ let Inst{29-24} = 0b111000;
+ let Inst{23} = Acq;
+ let Inst{22} = Rel;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rs;
+ let Inst{15} = 0b1;
+ let Inst{14-12} = opc;
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+multiclass Swap<bits<1> Acq, bits<1> Rel, string order> {
+ let Sz = 0b00, Acq = Acq, Rel = Rel in def b : BaseSWP<order, "b", GPR32>;
+ let Sz = 0b01, Acq = Acq, Rel = Rel in def h : BaseSWP<order, "h", GPR32>;
+ let Sz = 0b10, Acq = Acq, Rel = Rel in def s : BaseSWP<order, "", GPR32>;
+ let Sz = 0b11, Acq = Acq, Rel = Rel in def d : BaseSWP<order, "", GPR64>;
+let Predicates = [HasV8_1a], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
+class BaseLDOPregister<string op, string order, string size, RegisterClass RC>
+ : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ld" # op # order # size,
+ "\t$Rs, $Rt, [$Rn]","",[]> {
+ bits<2> Sz;
+ bit Acq;
+ bit Rel;
+ bits<5> Rs;
+ bits<3> opc;
+ bits<5> Rn;
+ bits<5> Rt;
+ let Inst{31-30} = Sz;
+ let Inst{29-24} = 0b111000;
+ let Inst{23} = Acq;
+ let Inst{22} = Rel;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rs;
+ let Inst{15} = 0b0;
+ let Inst{14-12} = opc;
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+multiclass LDOPregister<bits<3> opc, string op, bits<1> Acq, bits<1> Rel,
+ string order> {
+ let Sz = 0b00, Acq = Acq, Rel = Rel, opc = opc in
+ def b : BaseLDOPregister<op, order, "b", GPR32>;
+ let Sz = 0b01, Acq = Acq, Rel = Rel, opc = opc in
+ def h : BaseLDOPregister<op, order, "h", GPR32>;
+ let Sz = 0b10, Acq = Acq, Rel = Rel, opc = opc in
+ def s : BaseLDOPregister<op, order, "", GPR32>;
+ let Sz = 0b11, Acq = Acq, Rel = Rel, opc = opc in
+ def d : BaseLDOPregister<op, order, "", GPR64>;
+let Predicates = [HasV8_1a] in
+class BaseSTOPregister<string asm, RegisterClass OP, Register Reg,
+ Instruction inst> :
+ InstAlias<asm # "\t$Rs, [$Rn]", (inst Reg, OP:$Rs, GPR64sp:$Rn)>;
+multiclass STOPregister<string asm, string instr> {
+ def : BaseSTOPregister<asm # "lb", GPR32, WZR,
+ !cast<Instruction>(instr # "Lb")>;
+ def : BaseSTOPregister<asm # "lh", GPR32, WZR,
+ !cast<Instruction>(instr # "Lh")>;
+ def : BaseSTOPregister<asm # "l", GPR32, WZR,
+ !cast<Instruction>(instr # "Ls")>;
+ def : BaseSTOPregister<asm # "l", GPR64, XZR,
+ !cast<Instruction>(instr # "Ld")>;
+ def : BaseSTOPregister<asm # "b", GPR32, WZR,
+ !cast<Instruction>(instr # "b")>;
+ def : BaseSTOPregister<asm # "h", GPR32, WZR,
+ !cast<Instruction>(instr # "h")>;
+ def : BaseSTOPregister<asm, GPR32, WZR,
+ !cast<Instruction>(instr # "s")>;
+ def : BaseSTOPregister<asm, GPR64, XZR,
+ !cast<Instruction>(instr # "d")>;
// Allow the size specifier tokens to be upper case, not just lower.
def : TokenAlias<".8B", ".8b">;
def : TokenAlias<".4H", ".4h">;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 207c34c..6941a6b 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2066,10 +2066,9 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
-MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- int FrameIndex) const {
+MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
// This is a bit of a hack. Consider this instruction:
// %vreg0<def> = COPY %SP; GPR64all:%vreg0
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index fa4b8b7..d296768 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -131,6 +131,7 @@ public:
using TargetInstrInfo::foldMemoryOperandImpl;
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt,
int FrameIndex) const override;
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
diff --git a/lib/Target/AArch64/ b/lib/Target/AArch64/
index c7d6a69..2f1b893 100644
--- a/lib/Target/AArch64/
+++ b/lib/Target/AArch64/
@@ -66,6 +66,20 @@ def SDT_AArch64CSel : SDTypeProfile<1, 4,
SDTCisSameAs<0, 2>,
SDTCisVT<4, i32>]>;
+def SDT_AArch64CCMP : SDTypeProfile<1, 5,
+ [SDTCisVT<0, i32>,
+ SDTCisInt<1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisInt<3>,
+ SDTCisInt<4>,
+ SDTCisVT<5, i32>]>;
+def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
+ [SDTCisVT<0, i32>,
+ SDTCisFP<1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisInt<3>,
+ SDTCisInt<4>,
+ SDTCisVT<5, i32>]>;
def SDT_AArch64FCmp : SDTypeProfile<0, 2,
SDTCisSameAs<0, 1>]>;
@@ -160,6 +174,10 @@ def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut,
def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>;
def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>;
+def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>;
+def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>;
+def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
@@ -727,6 +745,74 @@ def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">;
def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
+// v8.1 atomic CAS
+defm CAS : CompareAndSwap<0, 0, "">;
+defm CASA : CompareAndSwap<1, 0, "a">;
+defm CASL : CompareAndSwap<0, 1, "l">;
+defm CASAL : CompareAndSwap<1, 1, "al">;
+// v8.1 atomic CASP
+defm CASP : CompareAndSwapPair<0, 0, "">;
+defm CASPA : CompareAndSwapPair<1, 0, "a">;
+defm CASPL : CompareAndSwapPair<0, 1, "l">;
+defm CASPAL : CompareAndSwapPair<1, 1, "al">;
+// v8.1 atomic SWP
+defm SWP : Swap<0, 0, "">;
+defm SWPA : Swap<1, 0, "a">;
+defm SWPL : Swap<0, 1, "l">;
+defm SWPAL : Swap<1, 1, "al">;
+// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register)
+defm LDADD : LDOPregister<0b000, "add", 0, 0, "">;
+defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">;
+defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">;
+defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">;
+defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">;
+defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">;
+defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">;
+defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">;
+defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">;
+defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">;
+defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">;
+defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">;
+defm LDSET : LDOPregister<0b011, "set", 0, 0, "">;
+defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">;
+defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">;
+defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">;
+defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">;
+defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">;
+defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">;
+defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">;
+defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">;
+defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">;
+defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">;
+defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">;
+defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">;
+defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">;
+defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">;
+defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">;
+defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">;
+defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">;
+defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">;
+defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">;
+// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR"
+defm : STOPregister<"stadd","LDADD">; // STADDx
+defm : STOPregister<"stclr","LDCLR">; // STCLRx
+defm : STOPregister<"steor","LDEOR">; // STEORx
+defm : STOPregister<"stset","LDSET">; // STSETx
+defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx
+defm : STOPregister<"stsmin","LDSMIN">;// STSMINx
+defm : STOPregister<"stumax","LDUMAX">;// STUMAXx
+defm : STOPregister<"stumin","LDUMIN">;// STUMINx
// Logical instructions.
@@ -950,13 +1036,10 @@ def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
-// Conditionally set flags instructions.
+// Conditional comparison instructions.
-defm CCMN : CondSetFlagsImm<0, "ccmn">;
-defm CCMP : CondSetFlagsImm<1, "ccmp">;
-defm CCMN : CondSetFlagsReg<0, "ccmn">;
-defm CCMP : CondSetFlagsReg<1, "ccmp">;
+defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
+defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
// Conditional select instructions.
@@ -2486,7 +2569,7 @@ defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>;
defm FCCMPE : FPCondComparison<1, "fccmpe">;
-defm FCCMP : FPCondComparison<0, "fccmp">;
+defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>;
// Floating point conditional select instruction.
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index 72edbf1..e55ae99 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -69,10 +69,10 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO,
RefKind = MCSymbolRefExpr::VK_PAGEOFF;
- const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
+ const MCExpr *Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
if (!MO.isJTI() && MO.getOffset())
- Expr = MCBinaryExpr::CreateAdd(
- Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx);
+ Expr = MCBinaryExpr::createAdd(
+ Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
return MCOperand::createExpr(Expr);
@@ -139,14 +139,14 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
RefFlags |= AArch64MCExpr::VK_NC;
const MCExpr *Expr =
- MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, Ctx);
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx);
if (!MO.isJTI() && MO.getOffset())
- Expr = MCBinaryExpr::CreateAdd(
- Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx);
+ Expr = MCBinaryExpr::createAdd(
+ Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
AArch64MCExpr::VariantKind RefKind;
RefKind = static_cast<AArch64MCExpr::VariantKind>(RefFlags);
- Expr = AArch64MCExpr::Create(Expr, RefKind, Ctx);
+ Expr = AArch64MCExpr::create(Expr, RefKind, Ctx);
return MCOperand::createExpr(Expr);
@@ -179,7 +179,7 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO,
case MachineOperand::MO_MachineBasicBlock:
MCOp = MCOperand::createExpr(
- MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx));
+ MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx));
case MachineOperand::MO_GlobalAddress:
MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
diff --git a/lib/Target/AArch64/ b/lib/Target/AArch64/
index d5ff3f1..b2efca0 100644
--- a/lib/Target/AArch64/
+++ b/lib/Target/AArch64/
@@ -26,8 +26,12 @@ let Namespace = "AArch64" in {
def hsub : SubRegIndex<16>;
def ssub : SubRegIndex<32>;
def dsub : SubRegIndex<32>;
+ def sube32 : SubRegIndex<32>;
+ def subo32 : SubRegIndex<32>;
def qhisub : SubRegIndex<64>;
def qsub : SubRegIndex<64>;
+ def sube64 : SubRegIndex<64>;
+ def subo64 : SubRegIndex<64>;
// Note: Code depends on these having consecutive numbers
def dsub0 : SubRegIndex<64>;
def dsub1 : SubRegIndex<64>;
@@ -592,3 +596,40 @@ def FPR16Op : RegisterOperand<FPR16, "printOperand">;
def FPR32Op : RegisterOperand<FPR32, "printOperand">;
def FPR64Op : RegisterOperand<FPR64, "printOperand">;
def FPR128Op : RegisterOperand<FPR128, "printOperand">;
+// ARMv8.1a atomic CASP register operands
+def WSeqPairs : RegisterTuples<[sube32, subo32],
+ [(rotl GPR32, 0), (rotl GPR32, 1)]>;
+def XSeqPairs : RegisterTuples<[sube64, subo64],
+ [(rotl GPR64, 0), (rotl GPR64, 1)]>;
+def WSeqPairsClass : RegisterClass<"AArch64", [untyped], 32,
+ (add WSeqPairs)>{
+ let Size = 64;
+def XSeqPairsClass : RegisterClass<"AArch64", [untyped], 64,
+ (add XSeqPairs)>{
+ let Size = 128;
+let RenderMethod = "addRegOperands", ParserMethod="tryParseGPRSeqPair" in {
+ def WSeqPairsAsmOperandClass : AsmOperandClass { let Name = "WSeqPair"; }
+ def XSeqPairsAsmOperandClass : AsmOperandClass { let Name = "XSeqPair"; }
+def WSeqPairClassOperand :
+ RegisterOperand<WSeqPairsClass, "printGPRSeqPairsClassOperand<32>"> {
+ let ParserMatchClass = WSeqPairsAsmOperandClass;
+def XSeqPairClassOperand :
+ RegisterOperand<XSeqPairsClass, "printGPRSeqPairsClassOperand<64>"> {
+ let ParserMatchClass = XSeqPairsAsmOperandClass;
+//===----- END: v8.1a atomic CASP register operands -----------------------===//
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index a9059ab..f23dd33 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -250,10 +250,14 @@ bool AArch64PassConfig::addPreISel() {
// FIXME: On AArch64, this depends on the type.
// Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
// and the offset has to be a multiple of the related size in bytes.
- if ((TM->getOptLevel() == CodeGenOpt::Aggressive &&
+ if ((TM->getOptLevel() != CodeGenOpt::None &&
EnableGlobalMerge == cl::BOU_UNSET) ||
- EnableGlobalMerge == cl::BOU_TRUE)
- addPass(createGlobalMergePass(TM, 4095));
+ EnableGlobalMerge == cl::BOU_TRUE) {
+ bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
+ (EnableGlobalMerge == cl::BOU_UNSET);
+ addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize));
+ }
if (TM->getOptLevel() != CodeGenOpt::None)
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index 299b4a5..18ee4a9 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -40,11 +40,11 @@ const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference(
if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
const MCSymbol *Sym = TM.getSymbol(GV, Mang);
const MCExpr *Res =
- MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
MCSymbol *PCSym = getContext().createTempSymbol();
- const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
- return MCBinaryExpr::CreateSub(Res, PC, getContext());
+ const MCExpr *PC = MCSymbolRefExpr::create(PCSym, getContext());
+ return MCBinaryExpr::createSub(Res, PC, getContext());
return TargetLoweringObjectFileMachO::getTTypeGlobalReference(
@@ -65,9 +65,9 @@ const MCExpr *AArch64_MachoTargetObjectFile::getIndirectSymViaGOTPCRel(
// On ARM64 Darwin, we can reference symbols with foo@GOT-., which
// is an indirect pc-relative reference.
const MCExpr *Res =
- MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
MCSymbol *PCSym = getContext().createTempSymbol();
- const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
- return MCBinaryExpr::CreateSub(Res, PC, getContext());
+ const MCExpr *PC = MCSymbolRefExpr::create(PCSym, getContext());
+ return MCBinaryExpr::createSub(Res, PC, getContext());
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 38d34e6..063c053 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -107,6 +107,7 @@ private:
OperandMatchResultTy tryParseAddSubImm(OperandVector &Operands);
OperandMatchResultTy tryParseGPR64sp0Operand(OperandVector &Operands);
bool tryParseVectorRegister(OperandVector &Operands);
+ OperandMatchResultTy tryParseGPRSeqPair(OperandVector &Operands);
enum AArch64MatchResultTy {
@@ -875,6 +876,16 @@ public:
return Kind == k_Register && !Reg.isVector &&
+ bool isWSeqPair() const {
+ return Kind == k_Register && !Reg.isVector &&
+ AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID].contains(
+ Reg.RegNum);
+ }
+ bool isXSeqPair() const {
+ return Kind == k_Register && !Reg.isVector &&
+ AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID].contains(
+ Reg.RegNum);
+ }
bool isGPR64sp0() const {
return Kind == k_Register && !Reg.isVector &&
@@ -1753,7 +1764,7 @@ static unsigned MatchRegisterName(StringRef Name);
/// }
static unsigned matchVectorRegName(StringRef Name) {
- return StringSwitch<unsigned>(Name)
+ return StringSwitch<unsigned>(Name.lower())
.Case("v0", AArch64::Q0)
.Case("v1", AArch64::Q1)
.Case("v2", AArch64::Q2)
@@ -2024,7 +2035,7 @@ AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
// No modifier was specified at all; this is the syntax for an ELF basic
// ADRP relocation (unfortunately).
Expr =
- AArch64MCExpr::Create(Expr, AArch64MCExpr::VK_ABS_PAGE, getContext());
+ AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS_PAGE, getContext());
} else if ((DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGE ||
DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGE) &&
Addend != 0) {
@@ -2157,7 +2168,7 @@ AArch64AsmParser::tryParseAddSubImm(OperandVector &Operands) {
if (MCE) {
int64_t Val = MCE->getValue();
if (Val > 0xfff && (Val & 0xfff) == 0) {
- Imm = MCConstantExpr::Create(Val >> 12, getContext());
+ Imm = MCConstantExpr::create(Val >> 12, getContext());
ShiftAmount = 12;
@@ -2347,14 +2358,14 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
#define SYS_ALIAS(op1, Cn, Cm, op2) \
do { \
- Expr = MCConstantExpr::Create(op1, getContext()); \
+ Expr = MCConstantExpr::create(op1, getContext()); \
Operands.push_back( \
AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \
Operands.push_back( \
AArch64Operand::CreateSysCR(Cn, S, getLoc(), getContext())); \
Operands.push_back( \
AArch64Operand::CreateSysCR(Cm, S, getLoc(), getContext())); \
- Expr = MCConstantExpr::Create(op2, getContext()); \
+ Expr = MCConstantExpr::create(op2, getContext()); \
Operands.push_back( \
AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \
} while (0)
@@ -2835,7 +2846,7 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
return true;
if (HasELFModifier)
- ImmVal = AArch64MCExpr::Create(ImmVal, RefKind, getContext());
+ ImmVal = AArch64MCExpr::create(ImmVal, RefKind, getContext());
return false;
@@ -3128,7 +3139,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
if (ShiftAmt <= MaxShiftAmt && Imm <= 0xFFFF) {
Operands[0] = AArch64Operand::CreateToken("movz", false, Loc, Ctx);
- MCConstantExpr::Create(Imm, Ctx), S, E, Ctx));
+ MCConstantExpr::create(Imm, Ctx), S, E, Ctx));
if (ShiftAmt)
ShiftAmt, true, S, E, Ctx));
@@ -3634,8 +3645,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
NewOp4Val = 63 - Op3Val;
- const MCExpr *NewOp3 = MCConstantExpr::Create(NewOp3Val, getContext());
- const MCExpr *NewOp4 = MCConstantExpr::Create(NewOp4Val, getContext());
+ const MCExpr *NewOp3 = MCConstantExpr::create(NewOp3Val, getContext());
+ const MCExpr *NewOp4 = MCConstantExpr::create(NewOp4Val, getContext());
Operands[0] = AArch64Operand::CreateToken(
"ubfm", false, Op.getStartLoc(), getContext());
@@ -3685,8 +3696,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(WidthOp.getStartLoc(),
"requested insert overflows register");
- const MCExpr *ImmRExpr = MCConstantExpr::Create(ImmR, getContext());
- const MCExpr *ImmSExpr = MCConstantExpr::Create(ImmS, getContext());
+ const MCExpr *ImmRExpr = MCConstantExpr::create(ImmR, getContext());
+ const MCExpr *ImmSExpr = MCConstantExpr::create(ImmS, getContext());
Operands[0] = AArch64Operand::CreateToken(
"bfm", false, Op.getStartLoc(), getContext());
Operands[2] = AArch64Operand::CreateReg(
@@ -3742,9 +3753,9 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
"requested insert overflows register");
const MCExpr *NewOp3 =
- MCConstantExpr::Create(NewOp3Val, getContext());
+ MCConstantExpr::create(NewOp3Val, getContext());
const MCExpr *NewOp4 =
- MCConstantExpr::Create(NewOp4Val, getContext());
+ MCConstantExpr::create(NewOp4Val, getContext());
Operands[3] = AArch64Operand::CreateImm(
NewOp3, Op3.getStartLoc(), Op3.getEndLoc(), getContext());
Operands[4] = AArch64Operand::CreateImm(
@@ -3800,7 +3811,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
"requested extract overflows register");
const MCExpr *NewOp4 =
- MCConstantExpr::Create(NewOp4Val, getContext());
+ MCConstantExpr::create(NewOp4Val, getContext());
Operands[4] = AArch64Operand::CreateImm(
NewOp4, Op4.getStartLoc(), Op4.getEndLoc(), getContext());
if (Tok == "bfxil")
@@ -4021,7 +4032,7 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".ltorg" || IDVal == ".pool")
return parseDirectiveLtorg(Loc);
if (IDVal == ".unreq")
- return parseDirectiveUnreq(DirectiveID.getLoc());
+ return parseDirectiveUnreq(Loc);
if (!IsMachO && !IsCOFF) {
if (IDVal == ".inst")
@@ -4106,8 +4117,8 @@ bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
return Error(L, "expected symbol after directive");
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
- const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext());
- Expr = AArch64MCExpr::Create(Expr, AArch64MCExpr::VK_TLSDESC, getContext());
+ const MCExpr *Expr = MCSymbolRefExpr::create(Sym, getContext());
+ Expr = AArch64MCExpr::create(Expr, AArch64MCExpr::VK_TLSDESC, getContext());
MCInst Inst;
@@ -4354,3 +4365,77 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
return Match_Success;
return Match_InvalidOperand;
+AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ if (getParser().getTok().isNot(AsmToken::Identifier)) {
+ Error(S, "expected register");
+ return MatchOperand_ParseFail;
+ }
+ int FirstReg = tryParseRegister();
+ if (FirstReg == -1) {
+ return MatchOperand_ParseFail;
+ }
+ const MCRegisterClass &WRegClass =
+ AArch64MCRegisterClasses[AArch64::GPR32RegClassID];
+ const MCRegisterClass &XRegClass =
+ AArch64MCRegisterClasses[AArch64::GPR64RegClassID];
+ bool isXReg = XRegClass.contains(FirstReg),
+ isWReg = WRegClass.contains(FirstReg);
+ if (!isXReg && !isWReg) {
+ Error(S, "expected first even register of a "
+ "consecutive same-size even/odd register pair");
+ return MatchOperand_ParseFail;
+ }
+ const MCRegisterInfo *RI = getContext().getRegisterInfo();
+ unsigned FirstEncoding = RI->getEncodingValue(FirstReg);
+ if (FirstEncoding & 0x1) {
+ Error(S, "expected first even register of a "
+ "consecutive same-size even/odd register pair");
+ return MatchOperand_ParseFail;
+ }
+ SMLoc M = getLoc();
+ if (getParser().getTok().isNot(AsmToken::Comma)) {
+ Error(M, "expected comma");
+ return MatchOperand_ParseFail;
+ }
+ // Eat the comma
+ getParser().Lex();
+ SMLoc E = getLoc();
+ int SecondReg = tryParseRegister();
+ if (SecondReg ==-1) {
+ return MatchOperand_ParseFail;
+ }
+ if (RI->getEncodingValue(SecondReg) != FirstEncoding + 1 ||
+ (isXReg && !XRegClass.contains(SecondReg)) ||
+ (isWReg && !WRegClass.contains(SecondReg))) {
+ Error(E,"expected second odd register of a "
+ "consecutive same-size even/odd register pair");
+ return MatchOperand_ParseFail;
+ }
+ unsigned Pair = 0;
+ if(isXReg) {
+ Pair = RI->getMatchingSuperReg(FirstReg, AArch64::sube64,
+ &AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID]);
+ } else {
+ Pair = RI->getMatchingSuperReg(FirstReg, AArch64::sube32,
+ &AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID]);
+ }
+ Operands.push_back(AArch64Operand::CreateReg(Pair, false, S, getLoc(),
+ getContext()));
+ return MatchOperand_Success;
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index a1ed703..359c2e7 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -169,6 +169,14 @@ static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder);
static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeWSeqPairsClassRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Addr,
+ const void *Decoder);
+static DecodeStatus DecodeXSeqPairsClassRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Addr,
+ const void *Decoder);
static bool Check(DecodeStatus &Out, DecodeStatus In) {
switch (In) {
@@ -1543,3 +1551,35 @@ static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn,
return Success;
+static DecodeStatus DecodeGPRSeqPairsClassRegisterClass(MCInst &Inst,
+ unsigned RegClassID,
+ unsigned RegNo,
+ uint64_t Addr,
+ const void *Decoder) {
+ // Register number must be even (see CASP instruction)
+ if (RegNo & 0x1)
+ return Fail;
+ unsigned Register = AArch64MCRegisterClasses[RegClassID].getRegister(RegNo);
+ Inst.addOperand(MCOperand::createReg(Register));
+ return Success;
+static DecodeStatus DecodeWSeqPairsClassRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Addr,
+ const void *Decoder) {
+ return DecodeGPRSeqPairsClassRegisterClass(Inst,
+ AArch64::WSeqPairsClassRegClassID,
+ RegNo, Addr, Decoder);
+static DecodeStatus DecodeXSeqPairsClassRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Addr,
+ const void *Decoder) {
+ return DecodeGPRSeqPairsClassRegisterClass(Inst,
+ AArch64::XSeqPairsClassRegClassID,
+ RegNo, Addr, Decoder);
diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 07e4a45..eb05ed9 100644
--- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -168,11 +168,11 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind);
if (Variant != MCSymbolRefExpr::VK_None)
- Add = MCSymbolRefExpr::Create(Sym, Variant, Ctx);
+ Add = MCSymbolRefExpr::create(Sym, Variant, Ctx);
- Add = MCSymbolRefExpr::Create(Sym, Ctx);
+ Add = MCSymbolRefExpr::create(Sym, Ctx);
} else {
- Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, Ctx);
+ Add = MCConstantExpr::create(SymbolicOp.AddSymbol.Value, Ctx);
@@ -181,37 +181,37 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
if (SymbolicOp.SubtractSymbol.Name) {
StringRef Name(SymbolicOp.SubtractSymbol.Name);
MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
- Sub = MCSymbolRefExpr::Create(Sym, Ctx);
+ Sub = MCSymbolRefExpr::create(Sym, Ctx);
} else {
- Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, Ctx);
+ Sub = MCConstantExpr::create(SymbolicOp.SubtractSymbol.Value, Ctx);
const MCExpr *Off = nullptr;
if (SymbolicOp.Value != 0)
- Off = MCConstantExpr::Create(SymbolicOp.Value, Ctx);
+ Off = MCConstantExpr::create(SymbolicOp.Value, Ctx);
const MCExpr *Expr;
if (Sub) {
const MCExpr *LHS;
if (Add)
- LHS = MCBinaryExpr::CreateSub(Add, Sub, Ctx);
+ LHS = MCBinaryExpr::createSub(Add, Sub, Ctx);
- LHS = MCUnaryExpr::CreateMinus(Sub, Ctx);
+ LHS = MCUnaryExpr::createMinus(Sub, Ctx);
if (Off)
- Expr = MCBinaryExpr::CreateAdd(LHS, Off, Ctx);
+ Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx);
Expr = LHS;
} else if (Add) {
if (Off)
- Expr = MCBinaryExpr::CreateAdd(Add, Off, Ctx);
+ Expr = MCBinaryExpr::createAdd(Add, Off, Ctx);
Expr = Add;
} else {
if (Off)
Expr = Off;
- Expr = MCConstantExpr::Create(0, Ctx);
+ Expr = MCConstantExpr::create(0, Ctx);
diff --git a/lib/Target/AArch64/Disassembler/LLVMBuild.txt b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
index 62827e8..73665eb 100644
--- a/lib/Target/AArch64/Disassembler/LLVMBuild.txt
+++ b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = AArch64Disassembler
parent = AArch64
-required_libraries = AArch64Info AArch64Utils MC MCDisassembler Support
+required_libraries = AArch64Desc AArch64Info AArch64Utils MC MCDisassembler Support
add_to_library_groups = AArch64
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index 02bd929..96fbe3a 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -206,15 +206,15 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
O << "\tmovn\t";
- O << getRegisterName(MI->getOperand(0).getReg()) << ", #"
- << *MI->getOperand(1).getExpr();
+ O << getRegisterName(MI->getOperand(0).getReg()) << ", #";
+ MI->getOperand(1).getExpr()->print(O, &MAI);
if ((Opcode == AArch64::MOVKXi || Opcode == AArch64::MOVKWi) &&
MI->getOperand(2).isExpr()) {
- O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #"
- << *MI->getOperand(2).getExpr();
+ O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #";
+ MI->getOperand(2).getExpr()->print(O, &MAI);
@@ -908,7 +908,7 @@ void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
O << '#' << Op.getImm();
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
- O << *Op.getExpr();
+ Op.getExpr()->print(O, &MAI);
@@ -966,7 +966,7 @@ void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
*CommentStream << '=' << (Val << Shift) << '\n';
} else {
assert(MO.isExpr() && "Unexpected operand type!");
- O << *MO.getExpr();
+ MO.getExpr()->print(O, &MAI);
printShifter(MI, OpNum + 1, STI, O);
@@ -1091,7 +1091,7 @@ void AArch64InstPrinter::printUImm12Offset(const MCInst *MI, unsigned OpNum,
O << "#" << (MO.getImm() * Scale);
} else {
assert(MO.isExpr() && "Unexpected operand type!");
- O << *MO.getExpr();
+ MO.getExpr()->print(O, &MAI);
@@ -1103,7 +1103,8 @@ void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum,
O << ", #" << (MO1.getImm() * Scale);
} else {
assert(MO1.isExpr() && "Unexpected operand type!");
- O << ", " << *MO1.getExpr();
+ O << ", ";
+ MO1.getExpr()->print(O, &MAI);
O << ']';
@@ -1113,7 +1114,7 @@ void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned prfop = MI->getOperand(OpNum).getImm();
bool Valid;
- StringRef Name =
+ StringRef Name =
AArch64PRFM::PRFMMapper().toString(prfop, STI.getFeatureBits(), Valid);
if (Valid)
O << Name;
@@ -1177,6 +1178,23 @@ static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
return Reg;
+template<unsigned size>
+void AArch64InstPrinter::printGPRSeqPairsClassOperand(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ static_assert(size == 64 || size == 32,
+ "Template parameter must be either 32 or 64");
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Sube = (size == 32) ? AArch64::sube32 : AArch64::sube64;
+ unsigned Subo = (size == 32) ? AArch64::subo32 : AArch64::subo64;
+ unsigned Even = MRI.getSubReg(Reg, Sube);
+ unsigned Odd = MRI.getSubReg(Reg, Subo);
+ O << getRegisterName(Even) << ", " << getRegisterName(Odd);
void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O,
@@ -1264,12 +1282,12 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
const MCConstantExpr *BranchTarget =
int64_t Address;
- if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+ if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) {
O << "0x";
} else {
// Otherwise, just print the expression.
- O << *MI->getOperand(OpNum).getExpr();
+ MI->getOperand(OpNum).getExpr()->print(O, &MAI);
@@ -1286,7 +1304,7 @@ void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
// Otherwise, just print the expression.
- O << *MI->getOperand(OpNum).getExpr();
+ MI->getOperand(OpNum).getExpr()->print(O, &MAI);
void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
@@ -1298,10 +1316,10 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
bool Valid;
StringRef Name;
if (Opcode == AArch64::ISB)
- Name = AArch64ISB::ISBMapper().toString(Val, STI.getFeatureBits(),
+ Name = AArch64ISB::ISBMapper().toString(Val, STI.getFeatureBits(),
- Name = AArch64DB::DBarrierMapper().toString(Val, STI.getFeatureBits(),
+ Name = AArch64DB::DBarrierMapper().toString(Val, STI.getFeatureBits(),
if (Valid)
O << Name;
@@ -1337,7 +1355,7 @@ void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
unsigned Val = MI->getOperand(OpNo).getImm();
bool Valid;
- StringRef Name =
+ StringRef Name =
AArch64PState::PStateMapper().toString(Val, STI.getFeatureBits(), Valid);
if (Valid)
O << StringRef(Name.str()).upper();
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index c2077a0..15dee97 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -153,6 +153,10 @@ protected:
const MCSubtargetInfo &STI, raw_ostream &O);
void printSIMDType10Operand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+ template<unsigned size>
+ void printGPRSeqPairsClassOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
class AArch64AppleInstPrinter : public AArch64InstPrinter {
diff --git a/lib/Target/AArch64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt
index 573fa10..642c183 100644
--- a/lib/Target/AArch64/LLVMBuild.txt
+++ b/lib/Target/AArch64/LLVMBuild.txt
@@ -31,5 +31,5 @@ has_jit = 1
type = Library
name = AArch64CodeGen
parent = AArch64
-required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target
+required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AArch64Utils Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target
add_to_library_groups = AArch64
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 31fceb6..6c15bf3 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -252,7 +252,7 @@ bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
// We are properly aligned, so write NOPs as requested.
Count /= 4;
for (uint64_t i = 0; i != Count; ++i)
- OW->Write32(0xd503201f);
+ OW->write32(0xd503201f);
return true;
@@ -496,7 +496,7 @@ void ELFAArch64AsmBackend::processFixupValue(
// FIXME: Should be replaced with something more principled.
static bool isByteSwappedFixup(const MCExpr *E) {
MCValue Val;
- if (!E->EvaluateAsRelocatable(Val, nullptr, nullptr))
+ if (!E->evaluateAsRelocatable(Val, nullptr, nullptr))
return false;
if (!Val.getSymA() || Val.getSymA()->getSymbol().isUndefined())
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 204a1ab..78837de 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -23,16 +23,14 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCELFStreamer.h"
-#include "llvm/MC/MCELFSymbolFlags.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
@@ -161,18 +159,18 @@ private:
MCSymbol *Start = getContext().createTempSymbol();
- MCSymbol *Symbol = getContext().getOrCreateSymbol(
- Name + "." + Twine(MappingSymbolCounter++));
+ auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
+ Name + "." + Twine(MappingSymbolCounter++)));
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
- MCELF::SetBinding(SD, ELF::STB_LOCAL);
- SD.setExternal(false);
+ getAssembler().registerSymbol(*Symbol);
+ Symbol->setType(ELF::STT_NOTYPE);
+ Symbol->setBinding(ELF::STB_LOCAL);
+ Symbol->setExternal(false);
auto Sec = getCurrentSection().first;
assert(Sec && "need a section");
- const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
+ const MCExpr *Value = MCSymbolRefExpr::create(Start, getContext());
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index ab2cad6..921c4b9 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -62,15 +62,14 @@ const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol(
// version.
MCContext &Context = Streamer.getContext();
const MCExpr *Res =
- MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, Context);
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOT, Context);
MCSymbol *PCSym = Context.createTempSymbol();
- const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context);
- return MCBinaryExpr::CreateSub(Res, PC, Context);
+ const MCExpr *PC = MCSymbolRefExpr::create(PCSym, Context);
+ return MCBinaryExpr::createSub(Res, PC, Context);
-AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(StringRef TT) {
- Triple T(TT);
+AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) {
if (T.getArch() == Triple::aarch64_be)
IsLittleEndian = false;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index 9b88de7..253cd30 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -18,9 +18,10 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
-class Target;
-class StringRef;
class MCStreamer;
+class Target;
+class Triple;
struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin {
explicit AArch64MCAsmInfoDarwin();
const MCExpr *
@@ -29,7 +30,7 @@ struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin {
struct AArch64MCAsmInfoELF : public MCAsmInfoELF {
- explicit AArch64MCAsmInfoELF(StringRef TT);
+ explicit AArch64MCAsmInfoELF(const Triple &T);
} // namespace llvm
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 277ea9f..7d8e79b 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -22,6 +22,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/EndianStream.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -174,16 +175,6 @@ public:
unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue,
const MCSubtargetInfo &STI) const;
- void EmitByte(unsigned char C, raw_ostream &OS) const { OS << (char)C; }
- void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const {
- // Output the constant in little endian byte order.
- for (unsigned i = 0; i != Size; ++i) {
- EmitByte(Val & 255, OS);
- Val >>= 8;
- }
- }
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
@@ -611,7 +602,7 @@ void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
- EmitConstant(Binary, 4, OS);
+ support::endian::Writer<support::little>(OS).write<uint32_t>(Binary);
++MCNumEmitted; // Keep track of the # of mi's emitted.
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 74b81af..2870341 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -15,9 +15,8 @@
#include "AArch64MCExpr.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/ELF.h"
#include "llvm/Support/ErrorHandling.h"
@@ -26,7 +25,7 @@ using namespace llvm;
#define DEBUG_TYPE "aarch64symbolrefexpr"
-const AArch64MCExpr *AArch64MCExpr::Create(const MCExpr *Expr, VariantKind Kind,
+const AArch64MCExpr *AArch64MCExpr::create(const MCExpr *Expr, VariantKind Kind,
MCContext &Ctx) {
return new (Ctx) AArch64MCExpr(Expr, Kind);
@@ -76,24 +75,24 @@ StringRef AArch64MCExpr::getVariantKindName() const {
-void AArch64MCExpr::PrintImpl(raw_ostream &OS) const {
+void AArch64MCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
if (getKind() != VK_NONE)
OS << getVariantKindName();
- OS << *Expr;
+ Expr->print(OS, MAI);
void AArch64MCExpr::visitUsedExpr(MCStreamer &Streamer) const {
-MCSection *AArch64MCExpr::FindAssociatedSection() const {
+MCSection *AArch64MCExpr::findAssociatedSection() const {
llvm_unreachable("FIXME: what goes here?");
-bool AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+bool AArch64MCExpr::evaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const {
- if (!getSubExpr()->EvaluateAsRelocatable(Res, Layout, Fixup))
+ if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup))
return false;
Res =
@@ -121,8 +120,7 @@ static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
// We're known to be under a TLS fixup, so any symbol should be
// modified. There should be only one.
const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
- MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol());
+ cast<MCSymbolELF>(SymRef.getSymbol()).setType(ELF::STT_TLS);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index 95d2277..1165314 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -112,7 +112,7 @@ public:
/// @name Construction
/// @{
- static const AArch64MCExpr *Create(const MCExpr *Expr, VariantKind Kind,
+ static const AArch64MCExpr *create(const MCExpr *Expr, VariantKind Kind,
MCContext &Ctx);
/// @}
@@ -145,13 +145,13 @@ public:
/// (e.g. ":got:", ":lo12:").
StringRef getVariantKindName() const;
- void PrintImpl(raw_ostream &OS) const override;
+ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
- MCSection *FindAssociatedSection() const override;
+ MCSection *findAssociatedSection() const override;
- bool EvaluateAsRelocatableImpl(MCValue &Res,
+ bool evaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const override;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 2e22de0..f89a852 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -58,15 +58,13 @@ static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) {
static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
- StringRef TT) {
- Triple TheTriple(TT);
+ const Triple &TheTriple) {
MCAsmInfo *MAI;
if (TheTriple.isOSDarwin())
MAI = new AArch64MCAsmInfoDarwin();
else {
assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF");
- MAI = new AArch64MCAsmInfoELF(TT);
+ MAI = new AArch64MCAsmInfoELF(TheTriple);
// Initial state of the frame pointer is SP.
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index d425975..67af810 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -31,10 +31,9 @@ class AArch64MachObjectWriter : public MCMachObjectTargetWriter {
AArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype)
- : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype,
- /*UseAggressiveSymbolFolding=*/true) {}
+ : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype) {}
- void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
+ void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
const MCAsmLayout &Layout, const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) override;
@@ -140,7 +139,7 @@ static bool canUseLocalRelocation(const MCSectionMachO &Section,
return false;
-void AArch64MachObjectWriter::RecordRelocation(
+void AArch64MachObjectWriter::recordRelocation(
MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
@@ -209,11 +208,9 @@ void AArch64MachObjectWriter::RecordRelocation(
} else if (Target.getSymB()) { // A - B + constant
const MCSymbol *A = &Target.getSymA()->getSymbol();
- const MCSymbolData &A_SD = Asm.getSymbolData(*A);
const MCSymbol *A_Base = Asm.getAtom(*A);
const MCSymbol *B = &Target.getSymB()->getSymbol();
- const MCSymbolData &B_SD = Asm.getSymbolData(*B);
const MCSymbol *B_Base = Asm.getAtom(*B);
// Check for "_foo@got - .", which comes through here as:
@@ -264,14 +261,12 @@ void AArch64MachObjectWriter::RecordRelocation(
"unsupported relocation with identical base");
- Value += (!A_SD.getFragment() ? 0 : Writer->getSymbolAddress(*A, Layout)) -
- (!A_Base || !A_Base->getData().getFragment()
- ? 0
- : Writer->getSymbolAddress(*A_Base, Layout));
- Value -= (!B_SD.getFragment() ? 0 : Writer->getSymbolAddress(*B, Layout)) -
- (!B_Base || !B_Base->getData().getFragment()
- ? 0
- : Writer->getSymbolAddress(*B_Base, Layout));
+ Value += (!A->getFragment() ? 0 : Writer->getSymbolAddress(*A, Layout)) -
+ (!A_Base || !A_Base->getFragment() ? 0 : Writer->getSymbolAddress(
+ *A_Base, Layout));
+ Value -= (!B->getFragment() ? 0 : Writer->getSymbolAddress(*B, Layout)) -
+ (!B_Base || !B_Base->getFragment() ? 0 : Writer->getSymbolAddress(
+ *B_Base, Layout));
@@ -304,7 +299,7 @@ void AArch64MachObjectWriter::RecordRelocation(
// If the evaluation is an absolute value, just use that directly
// to keep things easy.
int64_t Res;
- if (Symbol->getVariableValue()->EvaluateAsAbsolute(
+ if (Symbol->getVariableValue()->evaluateAsAbsolute(
Res, Layout, Writer->getSectionAddressMap())) {
FixedValue = Res;
@@ -313,12 +308,12 @@ void AArch64MachObjectWriter::RecordRelocation(
// FIXME: Will the Target we already have ever have any data in it
// we need to preserve and merge with the new Target? How about
// the FixedValue?
- if (!Symbol->getVariableValue()->EvaluateAsRelocatable(Target, &Layout,
+ if (!Symbol->getVariableValue()->evaluateAsRelocatable(Target, &Layout,
"unable to resolve variable '" +
Symbol->getName() + "'");
- return RecordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ return recordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
@@ -360,7 +355,7 @@ void AArch64MachObjectWriter::RecordRelocation(
// Resolve constant variables.
if (Symbol->isVariable()) {
int64_t Res;
- if (Symbol->getVariableValue()->EvaluateAsAbsolute(
+ if (Symbol->getVariableValue()->evaluateAsAbsolute(
Res, Layout, Writer->getSectionAddressMap())) {
FixedValue = Res;
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index 28b8e7e..ee85b65b 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -175,6 +175,7 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = {
{"id_mmfr1_el1", ID_MMFR1_EL1, {}},
{"id_mmfr2_el1", ID_MMFR2_EL1, {}},
{"id_mmfr3_el1", ID_MMFR3_EL1, {}},
+ {"id_mmfr4_el1", ID_MMFR4_EL1, {}},
{"id_isar0_el1", ID_ISAR0_EL1, {}},
{"id_isar1_el1", ID_ISAR1_EL1, {}},
{"id_isar2_el1", ID_ISAR2_EL1, {}},
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 7125f14..7e42f8e 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -603,6 +603,7 @@ namespace AArch64SysReg {
ISR_EL1 = 0xc608, // 11 000 1100 0001 000
CNTPCT_EL0 = 0xdf01, // 11 011 1110 0000 001
CNTVCT_EL0 = 0xdf02, // 11 011 1110 0000 010
+ ID_MMFR4_EL1 = 0xc016, // 11 000 0000 0010 110
// Trace registers
TRCSTATR = 0x8818, // 10 001 0000 0011 000
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index d3cc068..9550a3a 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -16,11 +16,13 @@
#include "llvm/Support/CodeGen.h"
+#include <functional>
namespace llvm {
class ARMAsmPrinter;
class ARMBaseTargetMachine;
+class Function;
class FunctionPass;
class ImmutablePass;
class MachineInstr;
@@ -38,7 +40,8 @@ FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
FunctionPass *createARMOptimizeBarriersPass();
-FunctionPass *createThumb2SizeReductionPass();
+FunctionPass *createThumb2SizeReductionPass(
+ std::function<bool(const Function &)> Ftor = nullptr);
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 04503b8..d84f296 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -87,7 +87,7 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
assert(GV && "C++ constructor pointer was not a GlobalValue!");
- const MCExpr *E = MCSymbolRefExpr::Create(GetARMGVSymbol(GV,
+ const MCExpr *E = MCSymbolRefExpr::create(GetARMGVSymbol(GV,
? MCSymbolRefExpr::VK_ARM_TARGET1
@@ -173,7 +173,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
case MachineOperand::MO_MachineBasicBlock:
- O << *MO.getMBB()->getSymbol();
+ MO.getMBB()->getSymbol()->print(O, MAI);
case MachineOperand::MO_GlobalAddress: {
const GlobalValue *GV = MO.getGlobal();
@@ -181,7 +181,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
O << ":lower16:";
else if (TF & ARMII::MO_HI16)
O << ":upper16:";
- O << *GetARMGVSymbol(GV, TF);
+ GetARMGVSymbol(GV, TF)->print(O, MAI);
printOffset(MO.getOffset(), O);
if (TF == ARMII::MO_PLT)
@@ -189,7 +189,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
case MachineOperand::MO_ConstantPoolIndex:
- O << *GetCPISymbol(MO.getIndex());
+ GetCPISymbol(MO.getIndex())->print(O, MAI);
@@ -467,7 +467,7 @@ emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel,
// using NLPs; however, sometimes the types are local to the file.
// We need to fill in the value for the NLP in those cases.
- MCSymbolRefExpr::Create(MCSym.getPointer(), OutStreamer.getContext()),
+ MCSymbolRefExpr::create(MCSym.getPointer(), OutStreamer.getContext()),
4 /*size*/);
@@ -640,9 +640,13 @@ void ARMAsmPrinter::emitAttributes() {
if (STI.hasFPARMv8())
// FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
// FPU, but there are two different names for it depending on the CPU.
- ATS.emitFPU(STI.hasD16() ? ARM::FK_FPV5_D16 : ARM::FK_FP_ARMV8);
+ ATS.emitFPU(STI.hasD16()
+ ? (STI.isFPOnlySP() ? ARM::FK_FPV5_SP_D16 : ARM::FK_FPV5_D16)
+ : ARM::FK_FP_ARMV8);
else if (STI.hasVFP4())
- ATS.emitFPU(STI.hasD16() ? ARM::FK_VFPV4_D16 : ARM::FK_VFPV4);
+ ATS.emitFPU(STI.hasD16()
+ ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16)
+ : ARM::FK_VFPV4);
else if (STI.hasVFP3())
ATS.emitFPU(STI.hasD16() ? ARM::FK_VFPV3_D16 : ARM::FK_VFPV3);
else if (STI.hasVFP2())
@@ -895,7 +899,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
// Create an MCSymbol for the reference.
const MCExpr *Expr =
- MCSymbolRefExpr::Create(MCSym, getModifierVariantKind(ACPV->getModifier()),
+ MCSymbolRefExpr::create(MCSym, getModifierVariantKind(ACPV->getModifier()),
if (ACPV->getPCAdjustment()) {
@@ -903,10 +907,10 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
- const MCExpr *PCRelExpr = MCSymbolRefExpr::Create(PCLabel, OutContext);
+ const MCExpr *PCRelExpr = MCSymbolRefExpr::create(PCLabel, OutContext);
PCRelExpr =
- MCBinaryExpr::CreateAdd(PCRelExpr,
- MCConstantExpr::Create(ACPV->getPCAdjustment(),
+ MCBinaryExpr::createAdd(PCRelExpr,
+ MCConstantExpr::create(ACPV->getPCAdjustment(),
if (ACPV->mustAddCurrentAddress()) {
@@ -914,25 +918,22 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
// label, so just emit a local label end reference that instead.
MCSymbol *DotSym = OutContext.createTempSymbol();
- const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
- PCRelExpr = MCBinaryExpr::CreateSub(PCRelExpr, DotExpr, OutContext);
+ const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
+ PCRelExpr = MCBinaryExpr::createSub(PCRelExpr, DotExpr, OutContext);
- Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, OutContext);
+ Expr = MCBinaryExpr::createSub(Expr, PCRelExpr, OutContext);
OutStreamer->EmitValue(Expr, Size);
-void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
- unsigned Opcode = MI->getOpcode();
- int OpNum = 1;
- if (Opcode == ARM::BR_JTadd)
- OpNum = 2;
- else if (Opcode == ARM::BR_JTm)
- OpNum = 3;
- const MachineOperand &MO1 = MI->getOperand(OpNum);
+void ARMAsmPrinter::EmitJumpTableAddrs(const MachineInstr *MI) {
+ const MachineOperand &MO1 = MI->getOperand(1);
unsigned JTI = MO1.getIndex();
+ // Make sure the Thumb jump table is 4-byte aligned. This will be a nop for
+ // ARM mode tables.
+ EmitAlignment(2);
// Emit a label for the jump table.
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
@@ -955,16 +956,16 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
// LJTI_0_0:
// .word (LBB0 - LJTI_0_0)
// .word (LBB1 - LJTI_0_0)
- const MCExpr *Expr = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+ const MCExpr *Expr = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
if (TM.getRelocationModel() == Reloc::PIC_)
- Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(JTISymbol,
+ Expr = MCBinaryExpr::createSub(Expr, MCSymbolRefExpr::create(JTISymbol,
// If we're generating a table of Thumb addresses in static relocation
// model, we need to add one to keep interworking correctly.
else if (AFI->isThumbFunction())
- Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(1,OutContext),
+ Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(1,OutContext),
OutStreamer->EmitValue(Expr, 4);
@@ -972,10 +973,8 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
-void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
- unsigned Opcode = MI->getOpcode();
- int OpNum = (Opcode == ARM::t2BR_JT) ? 2 : 1;
- const MachineOperand &MO1 = MI->getOperand(OpNum);
+void ARMAsmPrinter::EmitJumpTableInsts(const MachineInstr *MI) {
+ const MachineOperand &MO1 = MI->getOperand(1);
unsigned JTI = MO1.getIndex();
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
@@ -985,51 +984,67 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
- unsigned OffsetWidth = 4;
- if (MI->getOpcode() == ARM::t2TBB_JT) {
- OffsetWidth = 1;
- // Mark the jump table as data-in-code.
- OutStreamer->EmitDataRegion(MCDR_DataRegionJT8);
- } else if (MI->getOpcode() == ARM::t2TBH_JT) {
- OffsetWidth = 2;
- // Mark the jump table as data-in-code.
- OutStreamer->EmitDataRegion(MCDR_DataRegionJT16);
- }
for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
MachineBasicBlock *MBB = JTBBs[i];
- const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(),
+ const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::create(MBB->getSymbol(),
// If this isn't a TBB or TBH, the entries are direct branch instructions.
- if (OffsetWidth == 4) {
- EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2B)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2B)
- continue;
- }
+ }
+void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI,
+ unsigned OffsetWidth) {
+ assert((OffsetWidth == 1 || OffsetWidth == 2) && "invalid tbb/tbh width");
+ const MachineOperand &MO1 = MI->getOperand(1);
+ unsigned JTI = MO1.getIndex();
+ MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
+ OutStreamer->EmitLabel(JTISymbol);
+ // Emit each entry of the table.
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ // Mark the jump table as data-in-code.
+ OutStreamer->EmitDataRegion(OffsetWidth == 1 ? MCDR_DataRegionJT8
+ : MCDR_DataRegionJT16);
+ for (auto MBB : JTBBs) {
+ const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::create(MBB->getSymbol(),
+ OutContext);
// Otherwise it's an offset from the dispatch instruction. Construct an
// MCExpr for the entry. We want a value of the form:
- // (BasicBlockAddr - TableBeginAddr) / 2
+ // (BasicBlockAddr - TBBInstAddr + 4) / 2
// For example, a TBB table with entries jumping to basic blocks BB0 and BB1
// would look like:
// LJTI_0_0:
- // .byte (LBB0 - LJTI_0_0) / 2
- // .byte (LBB1 - LJTI_0_0) / 2
- const MCExpr *Expr =
- MCBinaryExpr::CreateSub(MBBSymbolExpr,
- MCSymbolRefExpr::Create(JTISymbol, OutContext),
- OutContext);
- Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(2, OutContext),
+ // .byte (LBB0 - (LCPI0_0 + 4)) / 2
+ // .byte (LBB1 - (LCPI0_0 + 4)) / 2
+ // where LCPI0_0 is a label defined just before the TBB instruction using
+ // this table.
+ MCSymbol *TBInstPC = GetCPISymbol(MI->getOperand(0).getImm());
+ const MCExpr *Expr = MCBinaryExpr::createAdd(
+ MCSymbolRefExpr::create(TBInstPC, OutContext),
+ MCConstantExpr::create(4, OutContext), OutContext);
+ Expr = MCBinaryExpr::createSub(MBBSymbolExpr, Expr, OutContext);
+ Expr = MCBinaryExpr::createDiv(Expr, MCConstantExpr::create(2, OutContext),
OutStreamer->EmitValue(Expr, OffsetWidth);
// Mark the end of jump table data-in-code region. 32-bit offsets use
// actual branch instructions here, so we don't mark those as a data-region
// at all.
- if (OffsetWidth != 4)
- OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
+ OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
+ // Make sure the next instruction is 2-byte aligned.
+ EmitAlignment(1);
void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
@@ -1212,7 +1227,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
: (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
: ARM::ADR))
- .addExpr(MCSymbolRefExpr::Create(CPISymbol, OutContext))
+ .addExpr(MCSymbolRefExpr::create(CPISymbol, OutContext))
// Add predicate operands.
@@ -1228,7 +1243,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
: (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
: ARM::ADR))
- .addExpr(MCSymbolRefExpr::Create(JTIPICSymbol, OutContext))
+ .addExpr(MCSymbolRefExpr::create(JTIPICSymbol, OutContext))
// Add predicate operands.
@@ -1278,7 +1293,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBL)
// Predicate comes first here.
- .addExpr(MCSymbolRefExpr::Create(TRegSym, OutContext)));
+ .addExpr(MCSymbolRefExpr::create(TRegSym, OutContext)));
@@ -1315,7 +1330,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const GlobalValue *GV = Op.getGlobal();
const unsigned TF = Op.getTargetFlags();
MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
- const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext);
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::Bcc)
// Add predicate operands.
@@ -1332,17 +1347,17 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
unsigned TF = MI->getOperand(1).getTargetFlags();
const GlobalValue *GV = MI->getOperand(1).getGlobal();
MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
- const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext);
MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(),
MI->getOperand(2).getImm(), OutContext);
- const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+ const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext);
unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4;
const MCExpr *PCRelExpr =
- ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr,
- MCBinaryExpr::CreateAdd(LabelSymExpr,
- MCConstantExpr::Create(PCAdj, OutContext),
+ ARMMCExpr::createLower16(MCBinaryExpr::createSub(GVSymExpr,
+ MCBinaryExpr::createAdd(LabelSymExpr,
+ MCConstantExpr::create(PCAdj, OutContext),
OutContext), OutContext), OutContext);
@@ -1365,17 +1380,17 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
unsigned TF = MI->getOperand(2).getTargetFlags();
const GlobalValue *GV = MI->getOperand(2).getGlobal();
MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
- const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext);
MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(),
MI->getOperand(3).getImm(), OutContext);
- const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+ const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext);
unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4;
const MCExpr *PCRelExpr =
- ARMMCExpr::CreateUpper16(MCBinaryExpr::CreateSub(GVSymExpr,
- MCBinaryExpr::CreateAdd(LabelSymExpr,
- MCConstantExpr::Create(PCAdj, OutContext),
+ ARMMCExpr::createUpper16(MCBinaryExpr::createSub(GVSymExpr,
+ MCBinaryExpr::createAdd(LabelSymExpr,
+ MCConstantExpr::create(PCAdj, OutContext),
OutContext), OutContext), OutContext);
// Add predicate operands.
@@ -1501,6 +1516,16 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ EmitJumpTableAddrs(MI);
+ return;
+ EmitJumpTableInsts(MI);
+ return;
+ EmitJumpTableTBInst(MI, MI->getOpcode() == ARM::JUMPTABLE_TBB ? 1 : 2);
+ return;
case ARM::t2BR_JT: {
// Lower and emit the instruction itself, then the jump table following it.
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
@@ -1509,37 +1534,19 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Add predicate operands.
- // Output the data for the jump table itself
- EmitJump2Table(MI);
- return;
- }
- case ARM::t2TBB_JT: {
- // Lower and emit the instruction itself, then the jump table following it.
- EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2TBB)
- .addReg(ARM::PC)
- .addReg(MI->getOperand(0).getReg())
- // Add predicate operands.
- .addImm(ARMCC::AL)
- .addReg(0));
- // Output the data for the jump table itself
- EmitJump2Table(MI);
- // Make sure the next instruction is 2-byte aligned.
- EmitAlignment(1);
+ case ARM::t2TBB_JT:
case ARM::t2TBH_JT: {
- // Lower and emit the instruction itself, then the jump table following it.
- EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2TBH)
- .addReg(ARM::PC)
- .addReg(MI->getOperand(0).getReg())
- // Add predicate operands.
- .addImm(ARMCC::AL)
- .addReg(0));
- // Output the data for the jump table itself
- EmitJump2Table(MI);
+ unsigned Opc = MI->getOpcode() == ARM::t2TBB_JT ? ARM::t2TBB : ARM::t2TBH;
+ // Lower and emit the PC label, then the instruction itself.
+ OutStreamer->EmitLabel(GetCPISymbol(MI->getOperand(3).getImm()));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(Opc)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
case ARM::tBR_JTr:
@@ -1559,13 +1566,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (Opc == ARM::MOVr)
EmitToStreamer(*OutStreamer, TmpInst);
- // Make sure the Thumb jump table is 4-byte aligned.
- if (Opc == ARM::tMOVr)
- EmitAlignment(2);
- // Output the data for the jump table itself
- EmitJumpTable(MI);
case ARM::BR_JTm: {
@@ -1589,9 +1589,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
- // Output the data for the jump table itself
- EmitJumpTable(MI);
case ARM::BR_JTadd: {
@@ -1606,9 +1603,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Add 's' bit operand (always reg0 for this)
- // Output the data for the jump table itself
- EmitJumpTable(MI);
case ARM::SPACE:
@@ -1695,7 +1689,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
+ const MCExpr *SymbolExpr = MCSymbolRefExpr::create(Label, OutContext);
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tB)
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 7bfb944..a6bc368 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -71,8 +71,9 @@ public:
void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
const MCSubtargetInfo *EndInfo) const override;
- void EmitJumpTable(const MachineInstr *MI);
- void EmitJump2Table(const MachineInstr *MI);
+ void EmitJumpTableAddrs(const MachineInstr *MI);
+ void EmitJumpTableInsts(const MachineInstr *MI);
+ void EmitJumpTableTBInst(const MachineInstr *MI, unsigned OffsetWidth);
void EmitInstruction(const MachineInstr *MI) override;
bool runOnMachineFunction(MachineFunction &F) override;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index c5d6b25..9c4b496 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -627,6 +627,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
case ARM::t2MOVi32imm:
return 8;
// If this machine instr is a constant pool entry, its size is recorded as
// operand #2.
return MI->getOperand(2).getImm();
@@ -641,42 +645,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
case ARM::t2Int_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp_nofp:
return 12;
- case ARM::BR_JTr:
- case ARM::BR_JTm:
- case ARM::BR_JTadd:
- case ARM::tBR_JTr:
- case ARM::t2BR_JT:
- case ARM::t2TBB_JT:
- case ARM::t2TBH_JT: {
- // These are jumptable branches, i.e. a branch followed by an inlined
- // jumptable. The size is 4 + 4 * number of entries. For TBB, each
- // entry is one byte; TBH two byte each.
- unsigned EntrySize = (Opc == ARM::t2TBB_JT)
- ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
- unsigned NumOps = MCID.getNumOperands();
- MachineOperand JTOP =
- MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1));
- unsigned JTI = JTOP.getIndex();
- const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
- assert(MJTI != nullptr);
- const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
- assert(JTI < JT.size());
- // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
- // 4 aligned. The assembler / linker may add 2 byte padding just before
- // the JT entries. The size does not include this padding; the
- // constant islands pass does separate bookkeeping for it.
- // FIXME: If we know the size of the function is less than (1 << 16) *2
- // bytes, we can use 16-bit entries instead. Then there won't be an
- // alignment issue.
- unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
- unsigned NumEntries = JT[JTI].MBBs.size();
- if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
- // Make sure the instruction that follows TBB is 2-byte aligned.
- // FIXME: Constant island pass should insert an "ALIGN" instruction
- // instead.
- ++NumEntries;
- return NumEntries * EntrySize + InstSize;
- }
case ARM::SPACE:
return MI->getOperand(1).getImm();
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 6fa5ad7..f4ec8c6 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -180,9 +180,7 @@ namespace {
MachineInstr *MI;
MachineInstr *CPEMI;
MachineBasicBlock *HighWaterMark;
- private:
unsigned MaxDisp;
- public:
bool NegOk;
bool IsSoImm;
bool KnownAlignment;
@@ -216,12 +214,24 @@ namespace {
/// CPEntries - Keep track of all of the constant pool entry machine
- /// instructions. For each original constpool index (i.e. those that
- /// existed upon entry to this pass), it keeps a vector of entries.
- /// Original elements are cloned as we go along; the clones are
- /// put in the vector of the original element, but have distinct CPIs.
+ /// instructions. For each original constpool index (i.e. those that existed
+ /// upon entry to this pass), it keeps a vector of entries. Original
+ /// elements are cloned as we go along; the clones are put in the vector of
+ /// the original element, but have distinct CPIs.
+ ///
+ /// The first half of CPEntries contains generic constants, the second half
+ /// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up
+ /// which vector it will be in here.
std::vector<std::vector<CPEntry> > CPEntries;
+ /// Maps a JT index to the offset in CPEntries containing copies of that
+ /// table. The equivalent map for a CONSTPOOL_ENTRY is the identity.
+ DenseMap<int, int> JumpTableEntryIndices;
+ /// Maps a JT index to the LEA that actually uses the index to calculate its
+ /// base address.
+ DenseMap<int, int> JumpTableUserIndices;
/// ImmBranch - One per immediate branch, keeping the machine instruction
/// pointer, conditional or unconditional, the max displacement,
/// and (if isCond is true) the corresponding unconditional branch
@@ -269,7 +279,8 @@ namespace {
- void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
+ void doInitialConstPlacement(std::vector<MachineInstr *> &CPEMIs);
+ void doInitialJumpTablePlacement(std::vector<MachineInstr *> &CPEMIs);
bool BBHasFallthrough(MachineBasicBlock *MBB);
CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
unsigned getCPELogAlign(const MachineInstr *CPEMI);
@@ -279,6 +290,7 @@ namespace {
void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
void adjustBBOffsetsAfter(MachineBasicBlock *BB);
bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
+ unsigned getCombinedIndex(const MachineInstr *CPEMI);
int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
bool findAvailableWater(CPUser&U, unsigned UserOffset,
water_iterator &WaterIter);
@@ -301,8 +313,9 @@ namespace {
bool optimizeThumb2Instructions();
bool optimizeThumb2Branches();
bool reorderThumb2JumpTables();
- unsigned removeDeadDefinitions(MachineInstr *MI, unsigned BaseReg,
- unsigned IdxReg);
+ bool preserveBaseRegister(MachineInstr *JumpMI, MachineInstr *LEAMI,
+ unsigned &DeadSize, bool &CanDeleteLEA,
+ bool &BaseRegKill);
bool optimizeThumb2JumpTables();
MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
MachineBasicBlock *JTBB);
@@ -413,7 +426,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
// we put them all at the end of the function.
std::vector<MachineInstr*> CPEMIs;
if (!MCP->isEmpty())
- doInitialPlacement(CPEMIs);
+ doInitialConstPlacement(CPEMIs);
+ if (MF->getJumpTableInfo())
+ doInitialJumpTablePlacement(CPEMIs);
/// The next UID to take is the first unused one.
@@ -478,7 +494,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
const CPEntry & CPE = CPEntries[i][j];
- AFI->recordCPEClone(i, CPE.CPI);
+ if (CPE.CPEMI && CPE.CPEMI->getOperand(1).isCPI())
+ AFI->recordCPEClone(i, CPE.CPI);
@@ -488,6 +505,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
+ JumpTableEntryIndices.clear();
+ JumpTableUserIndices.clear();
@@ -495,10 +514,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
return MadeChange;
-/// doInitialPlacement - Perform the initial placement of the constant pool
-/// entries. To start with, we put them all at the end of the function.
+/// \brief Perform the initial placement of the regular constant pool entries.
+/// To start with, we put them all at the end of the function.
-ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
+ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs) {
// Create the basic block to hold the CPE's.
MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
@@ -556,6 +575,66 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
+/// \brief Do initial placement of the jump tables. Because Thumb2's TBB and TBH
+/// instructions can be made more efficient if the jump table immediately
+/// follows the instruction, it's best to place them immediately next to their
+/// jumps to begin with. In almost all cases they'll never be moved from that
+/// position.
+void ARMConstantIslands::doInitialJumpTablePlacement(
+ std::vector<MachineInstr *> &CPEMIs) {
+ unsigned i = CPEntries.size();
+ auto MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
+ for (MachineBasicBlock &MBB : *MF) {
+ auto MI = MBB.getLastNonDebugInstr();
+ unsigned JTOpcode;
+ switch (MI->getOpcode()) {
+ default:
+ continue;
+ case ARM::BR_JTadd:
+ case ARM::BR_JTr:
+ case ARM::tBR_JTr:
+ case ARM::BR_JTm:
+ break;
+ case ARM::t2BR_JT:
+ break;
+ case ARM::t2TBB_JT:
+ break;
+ case ARM::t2TBH_JT:
+ break;
+ }
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ MachineOperand JTOp =
+ MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1));
+ unsigned JTI = JTOp.getIndex();
+ unsigned Size = JT[JTI].MBBs.size() * sizeof(uint32_t);
+ MachineBasicBlock *JumpTableBB = MF->CreateMachineBasicBlock();
+ MF->insert(std::next(MachineFunction::iterator(MBB)), JumpTableBB);
+ MachineInstr *CPEMI = BuildMI(*JumpTableBB, JumpTableBB->begin(),
+ DebugLoc(), TII->get(JTOpcode))
+ .addImm(i++)
+ .addJumpTableIndex(JTI)
+ .addImm(Size);
+ CPEMIs.push_back(CPEMI);
+ CPEntries.emplace_back(1, CPEntry(CPEMI, JTI));
+ JumpTableEntryIndices.insert(std::make_pair(JTI, CPEntries.size() - 1));
+ if (!LastCorrectlyNumberedBB)
+ LastCorrectlyNumberedBB = &MBB;
+ }
+ // If we did anything then we need to renumber the subsequent blocks.
+ if (LastCorrectlyNumberedBB)
+ MF->RenumberBlocks(LastCorrectlyNumberedBB);
/// BBHasFallthrough - Return true if the specified basic block can fallthrough
/// into the block immediately after it.
bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
@@ -595,9 +674,21 @@ ARMConstantIslands::CPEntry
/// getCPELogAlign - Returns the required alignment of the constant pool entry
/// represented by CPEMI. Alignment is measured in log2(bytes) units.
unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
- assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY);
+ switch (CPEMI->getOpcode()) {
+ break;
+ return 0;
+ return 1;
+ return 2;
+ default:
+ llvm_unreachable("unknown constpool entry kind");
+ }
- unsigned CPI = CPEMI->getOperand(1).getIndex();
+ unsigned CPI = getCombinedIndex(CPEMI);
assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
unsigned Align = MCP->getConstants()[CPI].getAlignment();
assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
@@ -706,12 +797,14 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
// Scan the instructions for constant pool operands.
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
- if (I->getOperand(op).isCPI()) {
+ if (I->getOperand(op).isCPI() || I->getOperand(op).isJTI()) {
// We found one. The addressing mode tells us the max displacement
// from the PC that this instruction permits.
@@ -727,6 +820,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// Taking the address of a CP entry.
case ARM::LEApcrel:
+ case ARM::LEApcrelJT:
// This takes a SoImm, which is 8 bit immediate rotated. We'll
// pretend the maximum offset is 255 * 4. Since each instruction
// 4 byte wide, this is always correct. We'll check for other
@@ -737,10 +831,12 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
IsSoImm = true;
case ARM::t2LEApcrel:
+ case ARM::t2LEApcrelJT:
Bits = 12;
NegOk = true;
case ARM::tLEApcrel:
+ case ARM::tLEApcrelJT:
Bits = 8;
Scale = 4;
@@ -768,6 +864,11 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// Remember that this is a user of a CP entry.
unsigned CPI = I->getOperand(op).getIndex();
+ if (I->getOperand(op).isJTI()) {
+ JumpTableUserIndices.insert(std::make_pair(CPI, CPUsers.size()));
+ CPI = JumpTableEntryIndices[CPI];
+ }
MachineInstr *CPEMI = CPEMIs[CPI];
unsigned MaxOffs = ((1 << Bits)-1) * Scale;
CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
@@ -1101,6 +1202,13 @@ bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI,
return false;
+unsigned ARMConstantIslands::getCombinedIndex(const MachineInstr *CPEMI) {
+ if (CPEMI->getOperand(1).isCPI())
+ return CPEMI->getOperand(1).getIndex();
+ return JumpTableEntryIndices[CPEMI->getOperand(1).getIndex()];
/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
/// if not, see if an in-range clone of the CPE is in range, and if so,
/// change the data structures so the user references the clone. Returns:
@@ -1120,7 +1228,7 @@ int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
// No. Look for previously created clones of the CPE that are in range.
- unsigned CPI = CPEMI->getOperand(1).getIndex();
+ unsigned CPI = getCombinedIndex(CPEMI);
std::vector<CPEntry> &CPEs = CPEntries[CPI];
for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
// We already tried this one
@@ -1365,7 +1473,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
CPUser &U = CPUsers[CPUserIndex];
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
- unsigned CPI = CPEMI->getOperand(1).getIndex();
+ unsigned CPI = getCombinedIndex(CPEMI);
unsigned Size = CPEMI->getOperand(2).getImm();
// Compute this only once, it's expensive.
unsigned UserOffset = getUserOffset(U);
@@ -1429,17 +1537,17 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
// Update internal data structures to account for the newly inserted MBB.
- // Decrement the old entry, and remove it if refcount becomes 0.
- decrementCPEReferenceCount(CPI, CPEMI);
// Now that we have an island to add the CPE to, clone the original CPE and
// add it to the island.
U.HighWaterMark = NewIsland;
- U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
- .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+ U.CPEMI = BuildMI(NewIsland, DebugLoc(), CPEMI->getDesc())
+ .addImm(ID).addOperand(CPEMI->getOperand(1)).addImm(Size);
CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
+ // Decrement the old entry, and remove it if refcount becomes 0.
+ decrementCPEReferenceCount(CPI, CPEMI);
// Mark the basic block as aligned as required by the const-pool entry.
@@ -1844,77 +1952,120 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
return MadeChange;
-/// If we've formed a TBB or TBH instruction, the base register is now
-/// redundant. In most cases, the instructions defining it will now be dead and
-/// can be tidied up. This function removes them if so, and returns the number
-/// of bytes saved.
-unsigned ARMConstantIslands::removeDeadDefinitions(MachineInstr *MI,
- unsigned BaseReg,
- unsigned IdxReg) {
- unsigned BytesRemoved = 0;
- MachineBasicBlock *MBB = MI->getParent();
+static bool isSimpleIndexCalc(MachineInstr &I, unsigned EntryReg,
+ unsigned BaseReg) {
+ if (I.getOpcode() != ARM::t2ADDrs)
+ return false;
- // Scan backwards to find the instruction that defines the base
- // register. Due to post-RA scheduling, we can't count on it
- // immediately preceding the branch instruction.
- MachineBasicBlock::iterator PrevI = MI;
- MachineBasicBlock::iterator B = MBB->begin();
- while (PrevI != B && !PrevI->definesRegister(BaseReg))
- --PrevI;
- // If for some reason we didn't find it, we can't do anything, so
- // just skip this one.
- if (!PrevI->definesRegister(BaseReg) || PrevI->hasUnmodeledSideEffects() ||
- PrevI->mayStore())
- return BytesRemoved;
- MachineInstr *AddrMI = PrevI;
- unsigned NewBaseReg = BytesRemoved;
- // Examine the instruction that calculates the jumptable entry address. Make
- // sure it only defines the base register and kills any uses other than the
- // index register. We also need precisely one use to trace backwards to
- // (hopefully) the LEA.
- for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
- const MachineOperand &MO = AddrMI->getOperand(k);
- if (!MO.isReg() || !MO.getReg())
- continue;
- if (MO.isDef() && MO.getReg() != BaseReg)
- return BytesRemoved;
+ if (I.getOperand(0).getReg() != EntryReg)
+ return false;
- if (MO.isUse() && MO.getReg() != IdxReg) {
- if (!MO.isKill() || (NewBaseReg != 0 && NewBaseReg != MO.getReg()))
- return BytesRemoved;
- NewBaseReg = MO.getReg();
+ if (I.getOperand(1).getReg() != BaseReg)
+ return false;
+ // FIXME: what about CC and IdxReg?
+ return true;
+/// \brief While trying to form a TBB/TBH instruction, we may (if the table
+/// doesn't immediately follow the BR_JT) need access to the start of the
+/// jump-table. We know one instruction that produces such a register; this
+/// function works out whether that definition can be preserved to the BR_JT,
+/// possibly by removing an intervening addition (which is usually needed to
+/// calculate the actual entry to jump to).
+bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
+ MachineInstr *LEAMI,
+ unsigned &DeadSize,
+ bool &CanDeleteLEA,
+ bool &BaseRegKill) {
+ if (JumpMI->getParent() != LEAMI->getParent())
+ return false;
+ // Now we hope that we have at least these instructions in the basic block:
+ // BaseReg = t2LEA ...
+ // [...]
+ // EntryReg = t2ADDrs BaseReg, ...
+ // [...]
+ // t2BR_JT EntryReg
+ //
+ // We have to be very conservative about what we recognise here though. The
+ // main perturbing factors to watch out for are:
+ // + Spills at any point in the chain: not direct problems but we would
+ // expect a blocking Def of the spilled register so in practice what we
+ // can do is limited.
+ // + EntryReg == BaseReg: this is the one situation we should allow a Def
+ // of BaseReg, but only if the t2ADDrs can be removed.
+ // + Some instruction other than t2ADDrs computing the entry. Not seen in
+ // the wild, but we should be careful.
+ unsigned EntryReg = JumpMI->getOperand(0).getReg();
+ unsigned BaseReg = LEAMI->getOperand(0).getReg();
+ CanDeleteLEA = true;
+ BaseRegKill = false;
+ MachineInstr *RemovableAdd = nullptr;
+ MachineBasicBlock::iterator I(LEAMI);
+ for (++I; &*I != JumpMI; ++I) {
+ if (isSimpleIndexCalc(*I, EntryReg, BaseReg)) {
+ RemovableAdd = &*I;
+ break;
+ }
+ for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) {
+ const MachineOperand &MO = I->getOperand(K);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isDef() && MO.getReg() == BaseReg)
+ return false;
+ if (MO.isUse() && MO.getReg() == BaseReg) {
+ BaseRegKill = BaseRegKill || MO.isKill();
+ CanDeleteLEA = false;
+ }
+ }
+ }
+ if (!RemovableAdd)
+ return true;
+ // Check the add really is removable, and that nothing else in the block
+ // clobbers BaseReg.
+ for (++I; &*I != JumpMI; ++I) {
+ for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) {
+ const MachineOperand &MO = I->getOperand(K);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isDef() && MO.getReg() == BaseReg)
+ return false;
+ if (MO.isUse() && MO.getReg() == EntryReg)
+ RemovableAdd = nullptr;
- // Want to continue searching for AddrMI, but there are 2 problems: AddrMI is
- // going away soon, and even decrementing once may be invalid.
- if (PrevI != B)
- PrevI = std::prev(PrevI);
- DEBUG(dbgs() << "remove addr: " << *AddrMI);
- BytesRemoved += TII->GetInstSizeInBytes(AddrMI);
- AddrMI->eraseFromParent();
- // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction
- // that gave us the initial base register definition.
- for (; PrevI != B && !PrevI->definesRegister(NewBaseReg); --PrevI)
- ;
- // The instruction should be a tLEApcrel or t2LEApcrelJT; we want
- // to delete it as well.
- MachineInstr *LeaMI = PrevI;
- if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
- LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
- LeaMI->getOperand(0).getReg() != NewBaseReg)
- return BytesRemoved;
- DEBUG(dbgs() << "remove lea: " << *LeaMI);
- BytesRemoved += TII->GetInstSizeInBytes(LeaMI);
- LeaMI->eraseFromParent();
- return BytesRemoved;
+ if (RemovableAdd) {
+ RemovableAdd->eraseFromParent();
+ DeadSize += 4;
+ } else if (BaseReg == EntryReg) {
+ // The add wasn't removable, but clobbered the base for the TBB. So we can't
+ // preserve it.
+ return false;
+ }
+ // We reached the end of the block without seeing another definition of
+ // BaseReg (except, possibly the t2ADDrs, which was removed). BaseReg can be
+ // used in the TBB/TBH if necessary.
+ return true;
+/// \brief Returns whether CPEMI is the first instruction in the block
+/// immediately following JTMI (assumed to be a TBB or TBH terminator). If so,
+/// we can switch the first register to PC and usually remove the address
+/// calculation that preceeded it.
+static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
+ MachineFunction::iterator MBB = JTMI->getParent();
+ MachineFunction *MF = MBB->getParent();
+ ++MBB;
+ return MBB != MF->end() && MBB->begin() != MBB->end() &&
+ &*MBB->begin() == CPEMI;
/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
@@ -1955,37 +2106,79 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
- if (ByteOk || HalfWordOk) {
- MachineBasicBlock *MBB = MI->getParent();
- unsigned BaseReg = MI->getOperand(0).getReg();
- bool BaseRegKill = MI->getOperand(0).isKill();
- if (!BaseRegKill)
- continue;
- unsigned IdxReg = MI->getOperand(1).getReg();
- bool IdxRegKill = MI->getOperand(1).isKill();
+ if (!ByteOk && !HalfWordOk)
+ continue;
- DEBUG(dbgs() << "Shrink JT: " << *MI);
- unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
- MachineBasicBlock::iterator MI_JT = MI;
- MachineInstr *NewJTMI =
- BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
- .addReg(IdxReg, getKillRegState(IdxRegKill))
- .addJumpTableIndex(JTI, JTOP.getTargetFlags());
- DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
- // FIXME: Insert an "ALIGN" instruction to ensure the next instruction
- // is 2-byte aligned. For now, asm printer will fix it up.
- unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
- unsigned OrigSize = TII->GetInstSizeInBytes(MI);
- unsigned DeadSize = removeDeadDefinitions(MI, BaseReg, IdxReg);
- MI->eraseFromParent();
+ MachineBasicBlock *MBB = MI->getParent();
+ if (!MI->getOperand(0).isKill()) // FIXME: needed now?
+ continue;
+ unsigned IdxReg = MI->getOperand(1).getReg();
+ bool IdxRegKill = MI->getOperand(1).isKill();
- int delta = OrigSize - NewSize + DeadSize;
- BBInfo[MBB->getNumber()].Size -= delta;
- adjustBBOffsetsAfter(MBB);
+ CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
+ unsigned DeadSize = 0;
+ bool CanDeleteLEA = false;
+ bool BaseRegKill = false;
+ bool PreservedBaseReg =
+ preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill);
- ++NumTBs;
- MadeChange = true;
+ if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg)
+ continue;
+ DEBUG(dbgs() << "Shrink JT: " << *MI);
+ MachineInstr *CPEMI = User.CPEMI;
+ unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
+ MachineBasicBlock::iterator MI_JT = MI;
+ MachineInstr *NewJTMI =
+ BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
+ .addReg(User.MI->getOperand(0).getReg(),
+ getKillRegState(BaseRegKill))
+ .addReg(IdxReg, getKillRegState(IdxRegKill))
+ .addJumpTableIndex(JTI, JTOP.getTargetFlags())
+ .addImm(CPEMI->getOperand(0).getImm());
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
+ CPEMI->setDesc(TII->get(JTOpc));
+ if (jumpTableFollowsTB(MI, User.CPEMI)) {
+ NewJTMI->getOperand(0).setReg(ARM::PC);
+ NewJTMI->getOperand(0).setIsKill(false);
+ if (CanDeleteLEA) {
+ User.MI->eraseFromParent();
+ DeadSize += 4;
+ // The LEA was eliminated, the TBB instruction becomes the only new user
+ // of the jump table.
+ User.MI = NewJTMI;
+ User.MaxDisp = 4;
+ User.NegOk = false;
+ User.IsSoImm = false;
+ User.KnownAlignment = false;
+ } else {
+ // The LEA couldn't be eliminated, so we must add another CPUser to
+ // record the TBB or TBH use.
+ int CPEntryIdx = JumpTableEntryIndices[JTI];
+ auto &CPEs = CPEntries[CPEntryIdx];
+ auto Entry = std::find_if(CPEs.begin(), CPEs.end(), [&](CPEntry &E) {
+ return E.CPEMI == User.CPEMI;
+ });
+ ++Entry->RefCount;
+ CPUsers.emplace_back(CPUser(NewJTMI, User.CPEMI, 4, false, false));
+ }
+ unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
+ unsigned OrigSize = TII->GetInstSizeInBytes(MI);
+ MI->eraseFromParent();
+ int Delta = OrigSize - NewSize + DeadSize;
+ BBInfo[MBB->getNumber()].Size -= Delta;
+ adjustBBOffsetsAfter(MBB);
+ ++NumTBs;
+ MadeChange = true;
return MadeChange;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 4405625e..50afb19 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -15,6 +15,7 @@
#include "ARMBaseInstrInfo.h"
#include "ARMTargetMachine.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -251,6 +252,9 @@ private:
// Select special operations if node forms integer ABS pattern
SDNode *SelectABSOp(SDNode *N);
+ SDNode *SelectReadRegister(SDNode *N);
+ SDNode *SelectWriteRegister(SDNode *N);
SDNode *SelectInlineAsm(SDNode *N);
SDNode *SelectConcatVector(SDNode *N);
@@ -2457,6 +2461,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
+ SDNode *ResNode = SelectWriteRegister(N);
+ if (ResNode)
+ return ResNode;
+ break;
+ }
+ SDNode *ResNode = SelectReadRegister(N);
+ if (ResNode)
+ return ResNode;
+ break;
+ }
SDNode *ResNode = SelectInlineAsm(N);
if (ResNode)
@@ -3336,6 +3352,418 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
return SelectCode(N);
+// Inspect a register string of the form
+// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
+// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
+// and obtain the integer operands from them, adding these operands to the
+// provided vector.
+static void getIntOperandsFromRegisterString(StringRef RegString,
+ SelectionDAG *CurDAG, SDLoc DL,
+ std::vector<SDValue>& Ops) {
+ SmallVector<StringRef, 5> Fields;
+ RegString.split(Fields, ":");
+ if (Fields.size() > 1) {
+ bool AllIntFields = true;
+ for (StringRef Field : Fields) {
+ // Need to trim out leading 'cp' characters and get the integer field.
+ unsigned IntField;
+ AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
+ Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
+ }
+ assert(AllIntFields &&
+ "Unexpected non-integer value in special register string.");
+ }
+// Maps a Banked Register string to its mask value. The mask value returned is
+// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
+// mask operand, which expresses which register is to be used, e.g. r8, and in
+// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
+// was invalid.
+static inline int getBankedRegisterMask(StringRef RegString) {
+ return StringSwitch<int>(RegString.lower())
+ .Case("r8_usr", 0x00)
+ .Case("r9_usr", 0x01)
+ .Case("r10_usr", 0x02)
+ .Case("r11_usr", 0x03)
+ .Case("r12_usr", 0x04)
+ .Case("sp_usr", 0x05)
+ .Case("lr_usr", 0x06)
+ .Case("r8_fiq", 0x08)
+ .Case("r9_fiq", 0x09)
+ .Case("r10_fiq", 0x0a)
+ .Case("r11_fiq", 0x0b)
+ .Case("r12_fiq", 0x0c)
+ .Case("sp_fiq", 0x0d)
+ .Case("lr_fiq", 0x0e)
+ .Case("lr_irq", 0x10)
+ .Case("sp_irq", 0x11)
+ .Case("lr_svc", 0x12)
+ .Case("sp_svc", 0x13)
+ .Case("lr_abt", 0x14)
+ .Case("sp_abt", 0x15)
+ .Case("lr_und", 0x16)
+ .Case("sp_und", 0x17)
+ .Case("lr_mon", 0x1c)
+ .Case("sp_mon", 0x1d)
+ .Case("elr_hyp", 0x1e)
+ .Case("sp_hyp", 0x1f)
+ .Case("spsr_fiq", 0x2e)
+ .Case("spsr_irq", 0x30)
+ .Case("spsr_svc", 0x32)
+ .Case("spsr_abt", 0x34)
+ .Case("spsr_und", 0x36)
+ .Case("spsr_mon", 0x3c)
+ .Case("spsr_hyp", 0x3e)
+ .Default(-1);
+// Maps a MClass special register string to its value for use in the
+// t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
+// Returns -1 to signify that the string was invalid.
+static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
+ return StringSwitch<int>(RegString.lower())
+ .Case("apsr", 0x0)
+ .Case("iapsr", 0x1)
+ .Case("eapsr", 0x2)
+ .Case("xpsr", 0x3)
+ .Case("ipsr", 0x5)
+ .Case("epsr", 0x6)
+ .Case("iepsr", 0x7)
+ .Case("msp", 0x8)
+ .Case("psp", 0x9)
+ .Case("primask", 0x10)
+ .Case("basepri", 0x11)
+ .Case("basepri_max", 0x12)
+ .Case("faultmask", 0x13)
+ .Case("control", 0x14)
+ .Default(-1);
+// The flags here are common to those allowed for apsr in the A class cores and
+// those allowed for the special registers in the M class cores. Returns a
+// value representing which flags were present, -1 if invalid.
+static inline int getMClassFlagsMask(StringRef Flags) {
+ if (Flags.empty())
+ return 0x3;
+ return StringSwitch<int>(Flags)
+ .Case("g", 0x1)
+ .Case("nzcvq", 0x2)
+ .Case("nzcvqg", 0x3)
+ .Default(-1);
+static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
+ const ARMSubtarget *Subtarget) {
+ // Ensure that the register (without flags) was a valid M Class special
+ // register.
+ int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
+ if (SYSmvalue == -1)
+ return -1;
+ // basepri, basepri_max and faultmask are only valid for V7m.
+ if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
+ return -1;
+ // If it was a read then we won't be expecting flags and so at this point
+ // we can return the mask.
+ if (IsRead) {
+ assert (Flags.empty() && "Unexpected flags for reading M class register.");
+ return SYSmvalue;
+ }
+ // We know we are now handling a write so need to get the mask for the flags.
+ int Mask = getMClassFlagsMask(Flags);
+ // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
+ // shouldn't have flags present.
+ if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
+ return -1;
+ // The _g and _nzcvqg versions are only valid if the DSP extension is
+ // available.
+ if (!Subtarget->hasThumb2DSP() && (Mask & 0x2))
+ return -1;
+ // The register was valid so need to put the mask in the correct place
+ // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
+ // construct the operand for the instruction node.
+ if (SYSmvalue < 0x4)
+ return SYSmvalue | Mask << 10;
+ return SYSmvalue;
+static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
+ // The mask operand contains the special register (R Bit) in bit 4, whether
+ // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
+ // bits 3-0 contains the fields to be accessed in the special register, set by
+ // the flags provided with the register.
+ int Mask = 0;
+ if (Reg == "apsr") {
+ // The flags permitted for apsr are the same flags that are allowed in
+ // M class registers. We get the flag value and then shift the flags into
+ // the correct place to combine with the mask.
+ Mask = getMClassFlagsMask(Flags);
+ if (Mask == -1)
+ return -1;
+ return Mask << 2;
+ }
+ if (Reg != "cpsr" && Reg != "spsr") {
+ return -1;
+ }
+ // This is the same as if the flags were "fc"
+ if (Flags.empty() || Flags == "all")
+ return Mask | 0x9;
+ // Inspect the supplied flags string and set the bits in the mask for
+ // the relevant and valid flags allowed for cpsr and spsr.
+ for (char Flag : Flags) {
+ int FlagVal;
+ switch (Flag) {
+ case 'c':
+ FlagVal = 0x1;
+ break;
+ case 'x':
+ FlagVal = 0x2;
+ break;
+ case 's':
+ FlagVal = 0x4;
+ break;
+ case 'f':
+ FlagVal = 0x8;
+ break;
+ default:
+ FlagVal = 0;
+ }
+ // This avoids allowing strings where the same flag bit appears twice.
+ if (!FlagVal || (Mask & FlagVal))
+ return -1;
+ Mask |= FlagVal;
+ }
+ // If the register is spsr then we need to set the R bit.
+ if (Reg == "spsr")
+ Mask |= 0x10;
+ return Mask;
+// Lower the read_register intrinsic to ARM specific DAG nodes
+// using the supplied metadata string to select the instruction node to use
+// and the registers/masks to construct as operands for the node.
+SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){
+ const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
+ const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ bool IsThumb2 = Subtarget->isThumb2();
+ SDLoc DL(N);
+ std::vector<SDValue> Ops;
+ getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
+ if (!Ops.empty()) {
+ // If the special register string was constructed of fields (as defined
+ // in the ACLE) then need to lower to MRC node (32 bit) or
+ // MRRC node(64 bit), we can make the distinction based on the number of
+ // operands we have.
+ unsigned Opcode;
+ SmallVector<EVT, 3> ResTypes;
+ if (Ops.size() == 5){
+ Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
+ ResTypes.append({ MVT::i32, MVT::Other });
+ } else {
+ assert(Ops.size() == 3 &&
+ "Invalid number of fields in special register string.");
+ Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
+ ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
+ }
+ Ops.push_back(getAL(CurDAG, DL));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+ Ops.push_back(N->getOperand(0));
+ return CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops);
+ }
+ std::string SpecialReg = RegString->getString().lower();
+ int BankedReg = getBankedRegisterMask(SpecialReg);
+ if (BankedReg != -1) {
+ Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
+ getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+ N->getOperand(0) };
+ return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
+ DL, MVT::i32, MVT::Other, Ops);
+ }
+ // The VFP registers are read by creating SelectionDAG nodes with opcodes
+ // corresponding to the register that is being read from. So we switch on the
+ // string to find which opcode we need to use.
+ unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
+ .Case("fpscr", ARM::VMRS)
+ .Case("fpexc", ARM::VMRS_FPEXC)
+ .Case("fpsid", ARM::VMRS_FPSID)
+ .Case("mvfr0", ARM::VMRS_MVFR0)
+ .Case("mvfr1", ARM::VMRS_MVFR1)
+ .Case("mvfr2", ARM::VMRS_MVFR2)
+ .Case("fpinst", ARM::VMRS_FPINST)
+ .Case("fpinst2", ARM::VMRS_FPINST2)
+ .Default(0);
+ // If an opcode was found then we can lower the read to a VFP instruction.
+ if (Opcode) {
+ if (!Subtarget->hasVFP2())
+ return nullptr;
+ if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
+ return nullptr;
+ Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+ N->getOperand(0) };
+ return CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops);
+ }
+ // If the target is M Class then need to validate that the register string
+ // is an acceptable value, so check that a mask can be constructed from the
+ // string.
+ if (Subtarget->isMClass()) {
+ int SYSmValue = getMClassRegisterMask(SpecialReg, "", true, Subtarget);
+ if (SYSmValue == -1)
+ return nullptr;
+ SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
+ getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+ N->getOperand(0) };
+ return CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops);
+ }
+ // Here we know the target is not M Class so we need to check if it is one
+ // of the remaining possible values which are apsr, cpsr or spsr.
+ if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
+ Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+ N->getOperand(0) };
+ return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, DL,
+ MVT::i32, MVT::Other, Ops);
+ }
+ if (SpecialReg == "spsr") {
+ Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+ N->getOperand(0) };
+ return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys,
+ DL, MVT::i32, MVT::Other, Ops);
+ }
+ return nullptr;
+// Lower the write_register intrinsic to ARM specific DAG nodes
+// using the supplied metadata string to select the instruction node to use
+// and the registers/masks to use in the nodes
+SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
+ const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
+ const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ bool IsThumb2 = Subtarget->isThumb2();
+ SDLoc DL(N);
+ std::vector<SDValue> Ops;
+ getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
+ if (!Ops.empty()) {
+ // If the special register string was constructed of fields (as defined
+ // in the ACLE) then need to lower to MCR node (32 bit) or
+ // MCRR node(64 bit), we can make the distinction based on the number of
+ // operands we have.
+ unsigned Opcode;
+ if (Ops.size() == 5) {
+ Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
+ Ops.insert(Ops.begin()+2, N->getOperand(2));
+ } else {
+ assert(Ops.size() == 3 &&
+ "Invalid number of fields in special register string.");
+ Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
+ SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
+ Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
+ }
+ Ops.push_back(getAL(CurDAG, DL));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+ Ops.push_back(N->getOperand(0));
+ return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
+ }
+ std::string SpecialReg = RegString->getString().lower();
+ int BankedReg = getBankedRegisterMask(SpecialReg);
+ if (BankedReg != -1) {
+ Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
+ getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+ N->getOperand(0) };
+ return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
+ DL, MVT::Other, Ops);
+ }
+ // The VFP registers are written to by creating SelectionDAG nodes with
+ // opcodes corresponding to the register that is being written. So we switch
+ // on the string to find which opcode we need to use.
+ unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
+ .Case("fpscr", ARM::VMSR)
+ .Case("fpexc", ARM::VMSR_FPEXC)
+ .Case("fpsid", ARM::VMSR_FPSID)
+ .Case("fpinst", ARM::VMSR_FPINST)
+ .Case("fpinst2", ARM::VMSR_FPINST2)
+ .Default(0);
+ if (Opcode) {
+ if (!Subtarget->hasVFP2())
+ return nullptr;
+ Ops = { N->getOperand(2), getAL(CurDAG, DL),
+ CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
+ return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
+ }
+ SmallVector<StringRef, 5> Fields;
+ StringRef(SpecialReg).split(Fields, "_", 1, false);
+ std::string Reg = Fields[0].str();
+ StringRef Flags = Fields.size() == 2 ? Fields[1] : "";
+ // If the target was M Class then need to validate the special register value
+ // and retrieve the mask for use in the instruction node.
+ if (Subtarget->isMClass()) {
+ // basepri_max gets split so need to correct Reg and Flags.
+ if (SpecialReg == "basepri_max") {
+ Reg = SpecialReg;
+ Flags = "";
+ }
+ int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
+ if (SYSmValue == -1)
+ return nullptr;
+ SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
+ N->getOperand(2), getAL(CurDAG, DL),
+ CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
+ return CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops);
+ }
+ // We then check to see if a valid mask can be constructed for one of the
+ // register string values permitted for the A and R class cores. These values
+ // are apsr, spsr and cpsr; these are also valid on older cores.
+ int Mask = getARClassRegisterMask(Reg, Flags);
+ if (Mask != -1) {
+ Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
+ getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+ N->getOperand(0) };
+ return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
+ DL, MVT::Other, Ops);
+ }
+ return nullptr;
SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
std::vector<SDValue> AsmNodeOperands;
unsigned Flag, Kind;
@@ -3492,13 +3920,29 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
- assert(ConstraintID == InlineAsm::Constraint_m &&
- "unexpected asm memory constraint");
- // Require the address to be in a register. That is safe for all ARM
- // variants and it is hard to do anything much smarter without knowing
- // how the operand is used.
- OutOps.push_back(Op);
- return false;
+ switch(ConstraintID) {
+ default:
+ llvm_unreachable("Unexpected asm memory constraint");
+ case InlineAsm::Constraint_i:
+ // FIXME: It seems strange that 'i' is needed here since it's supposed to
+ // be an immediate and not a memory constraint.
+ // Fallthrough.
+ case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_Q:
+ case InlineAsm::Constraint_Um:
+ case InlineAsm::Constraint_Un:
+ case InlineAsm::Constraint_Uq:
+ case InlineAsm::Constraint_Us:
+ case InlineAsm::Constraint_Ut:
+ case InlineAsm::Constraint_Uv:
+ case InlineAsm::Constraint_Uy:
+ // Require the address to be in a register. That is safe for all ARM
+ // variants and it is hard to do anything much smarter without knowing
+ // how the operand is used.
+ OutOps.push_back(Op);
+ return false;
+ }
+ return true;
/// createARMISelDag - This pass converts a legalized DAG into a
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 629cc90..47c8400 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -426,6 +426,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
+ setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
+ setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
if (Subtarget->hasNEON()) {
@@ -2378,6 +2381,24 @@ bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
return !Subtarget->isThumb1Only();
+// Trying to write a 64 bit value so need to split into two 32 bit values first,
+// and pass the lower and high parts through.
+static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ SDValue WriteValue = Op->getOperand(2);
+ // This function is only supposed to be called for i64 type argument.
+ assert(WriteValue.getValueType() == MVT::i64
+ && "LowerWRITE_REGISTER called for non-i64 type argument.");
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
+ DAG.getConstant(0, DL, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
+ DAG.getConstant(1, DL, MVT::i32));
+ SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
+ return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
@@ -4085,7 +4106,28 @@ unsigned ARMTargetLowering::getRegisterByName(const char* RegName,
if (Reg)
return Reg;
- report_fatal_error("Invalid register name global variable");
+ report_fatal_error(Twine("Invalid register name \""
+ + StringRef(RegName) + "\"."));
+// Result is 64 bit value so split into two 32 bit values and return as a
+// pair of values.
+static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) {
+ SDLoc DL(N);
+ // This function is only supposed to be called for i64 type destination.
+ assert(N->getValueType(0) == MVT::i64
+ && "ExpandREAD_REGISTER called for non-i64 type result.");
+ SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
+ DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
+ N->getOperand(0),
+ N->getOperand(1));
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
+ Read.getValue(1)));
+ Results.push_back(Read.getOperand(0));
/// ExpandBITCAST - If the target supports VFP, this function is called to
@@ -6355,6 +6397,7 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalAddress:
@@ -6439,6 +6482,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
switch (N->getOpcode()) {
llvm_unreachable("Don't know how to custom expand this!");
+ ExpandREAD_REGISTER(N, Results, DAG);
+ break;
Res = ExpandBITCAST(N, DAG);
@@ -10222,7 +10268,8 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
+ Type *Ty,
+ unsigned AS) const {
EVT VT = getValueType(Ty, true);
if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
return false;
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 63e87c5..c0b329c 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -286,7 +286,8 @@ namespace llvm {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
@@ -346,8 +347,31 @@ namespace llvm {
unsigned getInlineAsmMemConstraint(
const std::string &ConstraintCode) const override {
- // FIXME: Map different constraints differently.
- return InlineAsm::Constraint_m;
+ if (ConstraintCode == "Q")
+ return InlineAsm::Constraint_Q;
+ else if (ConstraintCode.size() == 2) {
+ if (ConstraintCode[0] == 'U') {
+ switch(ConstraintCode[1]) {
+ default:
+ break;
+ case 'm':
+ return InlineAsm::Constraint_Um;
+ case 'n':
+ return InlineAsm::Constraint_Un;
+ case 'q':
+ return InlineAsm::Constraint_Uq;
+ case 's':
+ return InlineAsm::Constraint_Us;
+ case 't':
+ return InlineAsm::Constraint_Ut;
+ case 'v':
+ return InlineAsm::Constraint_Uv;
+ case 'y':
+ return InlineAsm::Constraint_Uy;
+ }
+ }
+ }
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
const ARMSubtarget* getSubtarget() const {
diff --git a/lib/Target/ARM/ b/lib/Target/ARM/
index 778fd17..b8cac13 100644
--- a/lib/Target/ARM/
+++ b/lib/Target/ARM/
@@ -1826,6 +1826,32 @@ def CONSTPOOL_ENTRY :
PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
i32imm:$size), NoItinerary, []>;
+/// A jumptable consisting of direct 32-bit addresses of the destination basic
+/// blocks (either absolute, or relative to the start of the jump-table in PIC
+/// mode). Used mostly in ARM and Thumb-1 modes.
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+ i32imm:$size), NoItinerary, []>;
+/// A jumptable consisting of 32-bit jump instructions. Used for Thumb-2 tables
+/// that cannot be optimised to use TBB or TBH.
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+ i32imm:$size), NoItinerary, []>;
+/// A jumptable consisting of 8-bit unsigned integers representing offsets from
+/// a TBB instruction.
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+ i32imm:$size), NoItinerary, []>;
+/// A jumptable consisting of 16-bit unsigned integers representing offsets from
+/// a TBH instruction.
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+ i32imm:$size), NoItinerary, []>;
// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
// from removing one half of the matched pairs. That breaks PEI, which assumes
// these will always be in pairs, and asserts if it finds otherwise. Better way?
@@ -2224,7 +2250,7 @@ let isBranch = 1, isTerminator = 1 in {
[(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>,
- let isNotDuplicable = 1, isIndirectBranch = 1 in {
+ let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in {
def BR_JTr : ARMPseudoInst<(outs),
(ins GPR:$target, i32imm:$jt),
0, IIC_Br,
@@ -5039,10 +5065,11 @@ def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
imm:$CRm, imm:$opc2),
(MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
-class MovRRCopro<string opc, bit direction, list<dag> pattern = []>
- : ABI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
- GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm),
- NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> {
+class MovRRCopro<string opc, bit direction, dag oops, dag iops, list<dag>
+ pattern = []>
+ : ABI<0b1100, oops, iops, NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm",
+ pattern> {
let Inst{23-21} = 0b010;
let Inst{20} = direction;
@@ -5060,9 +5087,13 @@ class MovRRCopro<string opc, bit direction, list<dag> pattern = []>
def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */,
+ (outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt,
+ GPRnopc:$Rt2, c_imm:$CRm),
[(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt,
GPRnopc:$Rt2, imm:$CRm)]>;
-def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
+def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */,
+ (outs GPRnopc:$Rt, GPRnopc:$Rt2),
+ (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>;
class MovRRCopro2<string opc, bit direction, list<dag> pattern = []>
: ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
diff --git a/lib/Target/ARM/ b/lib/Target/ARM/
index 0fecfa1..40414da 100644
--- a/lib/Target/ARM/
+++ b/lib/Target/ARM/
@@ -526,6 +526,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
0, IIC_Br,
[(ARMbrjt tGPR:$target, tjumptable:$jt)]>,
Sched<[WriteBrTbl]> {
+ let Size = 2;
list<Predicate> Predicates = [IsThumb, IsThumb1Only];
diff --git a/lib/Target/ARM/ b/lib/Target/ARM/
index 814b524..aba8a7b 100644
--- a/lib/Target/ARM/
+++ b/lib/Target/ARM/
@@ -3531,20 +3531,20 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
let AsmMatchConverter = "cvtThumbBranches";
-let isNotDuplicable = 1, isIndirectBranch = 1 in {
+let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in {
def t2BR_JT : t2PseudoInst<(outs),
(ins GPR:$target, GPR:$index, i32imm:$jt),
0, IIC_Br,
[(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt)]>,
-// FIXME: Add a non-pc based case that can be predicated.
+// FIXME: Add a case that can be predicated.
def t2TBB_JT : t2PseudoInst<(outs),
- (ins GPR:$index, i32imm:$jt), 0, IIC_Br, []>,
+ (ins GPR:$base, GPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
def t2TBH_JT : t2PseudoInst<(outs),
- (ins GPR:$index, i32imm:$jt), 0, IIC_Br, []>,
+ (ins GPR:$base, GPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br,
@@ -4141,11 +4141,9 @@ class t2MovRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops,
let Inst{19-16} = CRn;
-class t2MovRRCopro<bits<4> Op, string opc, bit direction,
+class t2MovRRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops,
list<dag> pattern = []>
- : T2Cop<Op, (outs),
- (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
- opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> {
+ : T2Cop<Op, oops, iops, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> {
let Inst{27-24} = 0b1100;
let Inst{23-21} = 0b010;
let Inst{20} = direction;
@@ -4210,19 +4208,25 @@ def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
/* from ARM core register to coprocessor */
-def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0,
+def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0, (outs),
+ (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2,
+ c_imm:$CRm),
[(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
-def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0,
- [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt,
- GPR:$Rt2, imm:$CRm)]> {
+def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0, (outs),
+ (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2,
+ c_imm:$CRm),
+ [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt,
+ GPR:$Rt2, imm:$CRm)]> {
let Predicates = [IsThumb2, PreV8];
/* from coprocessor to ARM core register */
-def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1>;
+def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1, (outs GPR:$Rt, GPR:$Rt2),
+ (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm)>;
-def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1> {
+def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1, (outs GPR:$Rt, GPR:$Rt2),
+ (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm)> {
let Predicates = [IsThumb2, PreV8];
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 5b62a21..46ff326 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -7,8 +7,8 @@
-// This file contains a pass that performs load / store related peephole
-// optimizations. This pass should be run after register allocation.
+/// \file This file contains a pass that performs load / store related peephole
+/// optimizations. This pass should be run after register allocation.
@@ -58,10 +58,9 @@ STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
-/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
-/// load / store instructions to form ldm / stm instructions.
namespace {
+ /// Post- register allocation pass the combine load / store instructions to
+ /// form ldm / stm instructions.
struct ARMLoadStoreOpt : public MachineFunctionPass {
static char ID;
ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
@@ -271,10 +270,7 @@ static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
-namespace llvm {
- namespace ARM_AM {
-AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
+static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
switch (Opcode) {
default: llvm_unreachable("Unhandled opcode!");
@@ -328,9 +324,6 @@ AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
- } // end namespace ARM_AM
-} // end namespace llvm
static bool isT1i32Load(unsigned Opc) {
return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
@@ -469,9 +462,9 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
-/// MergeOps - Create and insert a LDM or STM with Base as base register and
-/// registers in Regs as the register operands that would be loaded / stored.
-/// It returns true if the transformation is done.
+/// Create and insert a LDM or STM with Base as base register and registers in
+/// Regs as the register operands that would be loaded / stored. It returns
+/// true if the transformation is done.
ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -665,7 +658,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
return true;
-/// \brief Find all instructions using a given imp-def within a range.
+/// Find all instructions using a given imp-def within a range.
/// We are trying to combine a range of instructions, one of which (located at
/// position RangeBegin) implicitly defines a register. The final LDM/STM will
@@ -721,8 +714,7 @@ void ARMLoadStoreOpt::findUsesOfImpDef(
-// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
-// success.
+/// Call MergeOps and update MemOps and merges accordingly on success.
void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
MemOpQueue &memOps,
unsigned memOpsBegin, unsigned memOpsEnd,
@@ -762,10 +754,10 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
Regs.push_back(std::make_pair(Reg, isKill));
// Collect any implicit defs of super-registers. They must be preserved.
- for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
- if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
+ for (const MachineOperand &MO : memOps[i].MBBI->operands()) {
+ if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead())
- unsigned DefReg = MO->getReg();
+ unsigned DefReg = MO.getReg();
if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
@@ -823,8 +815,8 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
-/// MergeLDR_STR - Merge a number of load / store instructions into one or more
-/// load / store multiple instructions.
+/// Merge a number of load / store instructions into one or more load / store
+/// multiple instructions.
ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
unsigned Base, unsigned Opcode, unsigned Size,
@@ -1083,8 +1075,8 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
-/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
-/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
+/// Fold proceeding/trailing inc/dec of base register into the
+/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
/// stmia rn, <ra, rb, rc>
/// rn := rn + 4 * 3;
@@ -1118,7 +1110,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
return false;
bool DoMerge = false;
- ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
+ ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
// Try merging with the previous instruction.
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
@@ -1231,8 +1223,8 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
-/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
-/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
+/// Fold proceeding/trailing inc/dec of base register into the
+/// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const TargetInstrInfo *TII,
@@ -1373,8 +1365,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
return true;
-/// isMemoryOp - Returns true if instruction is a memory operation that this
-/// pass is capable of operating on.
+/// Returns true if instruction is a memory operation that this pass is capable
+/// of operating on.
static bool isMemoryOp(const MachineInstr *MI) {
// When no memory operands are present, conservatively assume unaligned,
// volatile, unfoldable.
@@ -1428,8 +1420,8 @@ static bool isMemoryOp(const MachineInstr *MI) {
return false;
-/// AdvanceRS - Advance register scavenger to just before the earliest memory
-/// op that is being merged.
+/// Advance register scavenger to just before the earliest memory op that is
+/// being merged.
void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
unsigned Position = MemOps[0].Position;
@@ -1472,8 +1464,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) {
MachineInstr *MI = &*MBBI;
unsigned Opcode = MI->getOpcode();
- if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
- Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
+ if (Opcode == ARM::LDRD || Opcode == ARM::STRD) {
const MachineOperand &BaseOp = MI->getOperand(2);
unsigned BaseReg = BaseOp.getReg();
unsigned EvenReg = MI->getOperand(0).getReg();
@@ -1588,8 +1579,8 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
return false;
-/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
-/// ops of the same base and incrementing offset into LDM / STM ops.
+/// An optimization pass to turn multiple LDR / STR ops of the same base and
+/// incrementing offset into LDM / STM ops.
bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
unsigned NumMerges = 0;
unsigned NumMemOps = 0;
@@ -1770,9 +1761,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
return NumMerges > 0;
-/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
-/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
-/// directly restore the value of LR into pc.
+/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
+/// into the preceding stack restore so it directly restore the value of LR
+/// into pc.
/// ldmfd sp!, {..., lr}
/// bx lr
/// or
@@ -1834,12 +1825,9 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
return Modified;
-/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
-/// load / stores from consecutive locations close to make it more
-/// likely they will be combined later.
namespace {
+ /// Pre- register allocation pass that move load / stores from consecutive
+ /// locations close to make it more likely they will be combined later.
struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
static char ID;
ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
@@ -1936,7 +1924,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
-/// Copy Op0 and Op1 operands into a new array assigned to MI.
+/// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
MachineInstr *Op1) {
assert(MI->memoperands_empty() && "expected a new machineinstr");
@@ -1954,10 +1942,11 @@ static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
- DebugLoc &dl,
- unsigned &NewOpc, unsigned &EvenReg,
- unsigned &OddReg, unsigned &BaseReg,
- int &Offset, unsigned &PredReg,
+ DebugLoc &dl, unsigned &NewOpc,
+ unsigned &FirstReg,
+ unsigned &SecondReg,
+ unsigned &BaseReg, int &Offset,
+ unsigned &PredReg,
ARMCC::CondCodes &Pred,
bool &isT2) {
// Make sure we're allowed to generate LDRD/STRD.
@@ -2016,9 +2005,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
return false;
Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
- EvenReg = Op0->getOperand(0).getReg();
- OddReg = Op1->getOperand(0).getReg();
- if (EvenReg == OddReg)
+ FirstReg = Op0->getOperand(0).getReg();
+ SecondReg = Op1->getOperand(0).getReg();
+ if (FirstReg == SecondReg)
return false;
BaseReg = Op0->getOperand(1).getReg();
Pred = getInstrPredicate(Op0, PredReg);
@@ -2114,7 +2103,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
// to try to allocate a pair of registers that can form register pairs.
MachineInstr *Op0 = Ops.back();
MachineInstr *Op1 = Ops[Ops.size()-2];
- unsigned EvenReg = 0, OddReg = 0;
+ unsigned FirstReg = 0, SecondReg = 0;
unsigned BaseReg = 0, PredReg = 0;
ARMCC::CondCodes Pred = ARMCC::AL;
bool isT2 = false;
@@ -2122,21 +2111,21 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
int Offset = 0;
DebugLoc dl;
if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
- EvenReg, OddReg, BaseReg,
+ FirstReg, SecondReg, BaseReg,
Offset, PredReg, Pred, isT2)) {
const MCInstrDesc &MCID = TII->get(NewOpc);
const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
- MRI->constrainRegClass(EvenReg, TRC);
- MRI->constrainRegClass(OddReg, TRC);
+ MRI->constrainRegClass(FirstReg, TRC);
+ MRI->constrainRegClass(SecondReg, TRC);
// Form the pair instruction.
if (isLd) {
MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
- .addReg(EvenReg, RegState::Define)
- .addReg(OddReg, RegState::Define)
+ .addReg(FirstReg, RegState::Define)
+ .addReg(SecondReg, RegState::Define)
// FIXME: We're converting from LDRi12 to an insn that still
// uses addrmode2, so we need an explicit offset reg. It should
@@ -2149,8 +2138,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
} else {
MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
- .addReg(EvenReg)
- .addReg(OddReg)
+ .addReg(FirstReg)
+ .addReg(SecondReg)
// FIXME: We're converting from LDRi12 to an insn that still
// uses addrmode2, so we need an explicit offset reg. It should
@@ -2165,9 +2154,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
- // Add register allocation hints to form register pairs.
- MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
- MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
+ if (!isT2) {
+ // Add register allocation hints to form register pairs.
+ MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
+ MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
+ }
} else {
for (unsigned i = 0; i != NumMove; ++i) {
MachineInstr *Op = Ops.back();
@@ -2292,8 +2283,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
-/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
-/// optimization pass.
+/// Returns an instance of the load / store optimization pass.
FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
if (PreAlloc)
return new ARMPreAllocLoadStoreOpt();
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index e370b96..a2aca2d 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -30,35 +30,35 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
unsigned Option = MO.getTargetFlags() & ARMII::MO_OPTION_MASK;
switch (Option) {
default: {
- Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+ Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
switch (Option) {
default: llvm_unreachable("Unknown target flag on symbol operand");
case ARMII::MO_LO16:
- Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+ Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
- Expr = ARMMCExpr::CreateLower16(Expr, OutContext);
+ Expr = ARMMCExpr::createLower16(Expr, OutContext);
case ARMII::MO_HI16:
- Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+ Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
- Expr = ARMMCExpr::CreateUpper16(Expr, OutContext);
+ Expr = ARMMCExpr::createUpper16(Expr, OutContext);
- Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_PLT,
+ Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_PLT,
if (!MO.isJTI() && MO.getOffset())
- Expr = MCBinaryExpr::CreateAdd(Expr,
- MCConstantExpr::Create(MO.getOffset(),
+ Expr = MCBinaryExpr::createAdd(Expr,
+ MCConstantExpr::create(MO.getOffset(),
return MCOperand::createExpr(Expr);
@@ -80,7 +80,7 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
MCOp = MCOperand::createImm(MO.getImm());
case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(
+ MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
MO.getMBB()->getSymbol(), OutContext));
case MachineOperand::MO_GlobalAddress: {
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index e794fb7..0aceaed 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -304,10 +304,6 @@ public:
return getTM<ARMBaseTargetMachine>();
- const ARMSubtarget &getARMSubtarget() const {
- return *getARMTargetMachine().getSubtargetImpl();
- }
void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
@@ -330,24 +326,28 @@ void ARMPassConfig::addIRPasses() {
// Cmpxchg instructions are often used with a subsequent comparison to
// determine whether it succeeded. We can exploit existing control-flow in
// ldrex/strex loops to simplify this, but it needs tidying up.
- const ARMSubtarget *Subtarget = &getARMSubtarget();
- if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only())
- if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
- addPass(createCFGSimplificationPass());
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
+ addPass(createCFGSimplificationPass(-1, [this](const Function &F) {
+ const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F);
+ return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
+ }));
bool ARMPassConfig::addPreISel() {
- if ((TM->getOptLevel() == CodeGenOpt::Aggressive &&
+ if ((TM->getOptLevel() != CodeGenOpt::None &&
EnableGlobalMerge == cl::BOU_UNSET) ||
- EnableGlobalMerge == cl::BOU_TRUE)
+ EnableGlobalMerge == cl::BOU_TRUE) {
// FIXME: This is using the thumb1 only constant value for
// maximal global offset for merging globals. We may want
// to look into using the old value for non-thumb1 code of
// 4095 based on the TargetMachine, but this starts to become
// tricky when doing code gen per function.
- addPass(createGlobalMergePass(TM, 127));
+ bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
+ (EnableGlobalMerge == cl::BOU_UNSET);
+ addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize));
+ }
return false;
@@ -387,10 +387,13 @@ void ARMPassConfig::addPreSched2() {
if (getOptLevel() != CodeGenOpt::None) {
// in v8, IfConversion depends on Thumb instruction widths
- if (getARMSubtarget().restrictIT())
- addPass(createThumb2SizeReductionPass());
- if (!getARMSubtarget().isThumb1Only())
- addPass(&IfConverterID);
+ addPass(createThumb2SizeReductionPass([this](const Function &F) {
+ return this->TM->getSubtarget<ARMSubtarget>(F).restrictIT();
+ }));
+ addPass(createIfConverter([this](const Function &F) {
+ return !this->TM->getSubtarget<ARMSubtarget>(F).isThumb1Only();
+ }));
@@ -399,8 +402,9 @@ void ARMPassConfig::addPreEmitPass() {
// Constant island pass work on unbundled instructions.
- if (getARMSubtarget().isThumb2())
- addPass(&UnpackMachineBundlesID);
+ addPass(createUnpackMachineBundles([this](const Function &F) {
+ return this->TM->getSubtarget<ARMSubtarget>(F).isThumb2();
+ }));
// Don't optimize barriers at -O0.
if (getOptLevel() != CodeGenOpt::None)
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 80f03c6..eaed5cc 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -50,12 +50,12 @@ const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference(
assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only");
- return MCSymbolRefExpr::Create(TM.getSymbol(GV, Mang),
+ return MCSymbolRefExpr::create(TM.getSymbol(GV, Mang),
MCSymbolRefExpr::VK_ARM_TARGET2, getContext());
const MCExpr *ARMElfTargetObjectFile::
getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
- return MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_ARM_TLSLDO,
+ return MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_ARM_TLSLDO,
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 30c7d62..8bcbb11 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -1051,7 +1051,7 @@ public:
if (!CE) return false;
int64_t Value = CE->getValue();
return (ARM_AM::getSOImmVal(Value) != -1 ||
- ARM_AM::getSOImmVal(-Value) != -1);;
+ ARM_AM::getSOImmVal(-Value) != -1);
bool isT2SOImm() const {
if (!isImm()) return false;
@@ -4252,7 +4252,7 @@ ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
Error(S, "'be' or 'le' operand expected");
return MatchOperand_ParseFail;
- Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::Create(Val,
+ Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::create(Val,
S, Tok.getEndLoc()));
return MatchOperand_Success;
@@ -4656,7 +4656,7 @@ ARMAsmParser::parseAM3Offset(OperandVector &Operands) {
Val = INT32_MIN;
- ARMOperand::CreateImm(MCConstantExpr::Create(Val, getContext()), S, E));
+ ARMOperand::CreateImm(MCConstantExpr::create(Val, getContext()), S, E));
return MatchOperand_Success;
@@ -4886,7 +4886,7 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) {
// If the constant was #-0, represent it as INT32_MIN.
int32_t Val = CE->getValue();
if (isNegative && Val == 0)
- CE = MCConstantExpr::Create(INT32_MIN, getContext());
+ CE = MCConstantExpr::create(INT32_MIN, getContext());
// Now we should have the closing ']'
if (Parser.getTok().isNot(AsmToken::RBrac))
@@ -5073,7 +5073,7 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) {
IntVal ^= (uint64_t)isNegative << 31;
Parser.Lex(); // Eat the token.
- MCConstantExpr::Create(IntVal, getContext()),
+ MCConstantExpr::create(IntVal, getContext()),
S, Parser.getTok().getLoc()));
return MatchOperand_Success;
@@ -5090,7 +5090,7 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) {
Val = APFloat(RealVal).bitcastToAPInt().getZExtValue();
- MCConstantExpr::Create(Val, getContext()), S,
+ MCConstantExpr::create(Val, getContext()), S,
return MatchOperand_Success;
@@ -5179,7 +5179,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
if (CE) {
int32_t Val = CE->getValue();
if (isNegative && Val == 0)
- ImmVal = MCConstantExpr::Create(INT32_MIN, getContext());
+ ImmVal = MCConstantExpr::create(INT32_MIN, getContext());
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
@@ -5209,7 +5209,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
if (getParser().parseExpression(SubExprVal))
return true;
- const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal,
+ const MCExpr *ExprVal = ARMMCExpr::create(RefKind, SubExprVal,
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E));
@@ -5765,7 +5765,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Add the processor imod operand, if necessary.
if (ProcessorIMod) {
- MCConstantExpr::Create(ProcessorIMod, getContext()),
+ MCConstantExpr::create(ProcessorIMod, getContext()),
NameLoc, NameLoc));
} else if (Mnemonic == "cps" && isMClass()) {
return Error(NameLoc, "instruction 'cps' requires effect for M-class");
@@ -6752,13 +6752,13 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
MCSymbol *Dot = getContext().createTempSymbol();
const MCExpr *OpExpr = Inst.getOperand(2).getExpr();
- const MCExpr *InstPC = MCSymbolRefExpr::Create(Dot,
+ const MCExpr *InstPC = MCSymbolRefExpr::create(Dot,
- const MCExpr *Const8 = MCConstantExpr::Create(8, getContext());
- const MCExpr *ReadPC = MCBinaryExpr::CreateAdd(InstPC, Const8,
+ const MCExpr *Const8 = MCConstantExpr::create(8, getContext());
+ const MCExpr *ReadPC = MCBinaryExpr::createAdd(InstPC, Const8,
- const MCExpr *FixupAddr = MCBinaryExpr::CreateAdd(ReadPC, OpExpr,
+ const MCExpr *FixupAddr = MCBinaryExpr::createAdd(ReadPC, OpExpr,
@@ -9168,74 +9168,19 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
StringRef CPU = getParser().parseStringToEndOfStatement().trim();
getTargetStreamer().emitTextAttribute(ARMBuildAttrs::CPU_name, CPU);
+ // FIXME: This is using table-gen data, but should be moved to
+ // ARMTargetParser once that is table-gen'd.
if (!STI.isCPUStringValid(CPU)) {
Error(L, "Unknown CPU name");
return false;
- // FIXME: This switches the CPU features globally, therefore it might
- // happen that code you would not expect to assemble will. For details
- // see:
STI.InitMCProcessorInfo(CPU, "");
return false;
-// FIXME: This is duplicated in getARMFPUFeatures() in
-// tools/clang/lib/Driver/Tools.cpp
-static const struct {
- const unsigned ID;
- const FeatureBitset Enabled;
- const FeatureBitset Disabled;
-} FPUs[] = {
- {/* ID */ ARM::FK_VFP,
- /* Enabled */ {ARM::FeatureVFP2},
- /* Disabled */ {ARM::FeatureNEON}},
- {/* ID */ ARM::FK_VFPV2,
- /* Enabled */ {ARM::FeatureVFP2},
- /* Disabled */ {ARM::FeatureNEON}},
- {/* ID */ ARM::FK_VFPV3,
- /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3},
- /* Disabled */ {ARM::FeatureNEON, ARM::FeatureD16}},
- {/* ID */ ARM::FK_VFPV3_D16,
- /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureD16},
- /* Disabled */ {ARM::FeatureNEON}},
- {/* ID */ ARM::FK_VFPV4,
- /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4},
- /* Disabled */ {ARM::FeatureNEON, ARM::FeatureD16}},
- {/* ID */ ARM::FK_VFPV4_D16,
- /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4,
- ARM::FeatureD16},
- /* Disabled */ {ARM::FeatureNEON}},
- {/* ID */ ARM::FK_FPV5_D16,
- /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4,
- ARM::FeatureFPARMv8, ARM::FeatureD16},
- /* Disabled */ {ARM::FeatureNEON, ARM::FeatureCrypto}},
- {/* ID */ ARM::FK_FP_ARMV8,
- /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4,
- ARM::FeatureFPARMv8},
- /* Disabled */ {ARM::FeatureNEON, ARM::FeatureCrypto, ARM::FeatureD16}},
- {/* ID */ ARM::FK_NEON,
- /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON},
- /* Disabled */ {ARM::FeatureD16}},
- {/* ID */ ARM::FK_NEON_VFPV4,
- /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4,
- ARM::FeatureNEON},
- /* Disabled */ {ARM::FeatureD16}},
- {/* ID */ ARM::FK_NEON_FP_ARMV8,
- /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4,
- ARM::FeatureFPARMv8, ARM::FeatureNEON},
- /* Disabled */ {ARM::FeatureCrypto, ARM::FeatureD16}},
- /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4,
- ARM::FeatureFPARMv8, ARM::FeatureNEON,
- ARM::FeatureCrypto},
- /* Disabled */ {ARM::FeatureD16}},
- {ARM::FK_SOFTVFP, {}, {}},
/// parseDirectiveFPU
/// ::= .fpu str
bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
@@ -9243,23 +9188,15 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
StringRef FPU = getParser().parseStringToEndOfStatement().trim();
unsigned ID = ARMTargetParser::parseFPU(FPU);
- if (ID == ARM::FK_INVALID) {
+ std::vector<const char *> Features;
+ if (!ARMTargetParser::getFPUFeatures(ID, Features)) {
Error(FPUNameLoc, "Unknown FPU name");
return false;
- for (const auto &Entry : FPUs) {
- if (Entry.ID != ID)
- continue;
- // Need to toggle features that should be on but are off and that
- // should off but are on.
- FeatureBitset Toggle = (Entry.Enabled & ~STI.getFeatureBits()) |
- (Entry.Disabled & STI.getFeatureBits());
- setAvailableFeatures(ComputeAvailableFeatures(STI.ToggleFeature(Toggle)));
- break;
- }
+ for (auto Feature : Features)
+ STI.ApplyFeatureFlag(Feature);
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
return false;
@@ -9804,7 +9741,7 @@ bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) {
const MCSymbolRefExpr *SRE =
- MCSymbolRefExpr::Create(Parser.getTok().getIdentifier(),
+ MCSymbolRefExpr::create(Parser.getTok().getIdentifier(),
MCSymbolRefExpr::VK_ARM_TLSDESCSEQ, getContext());
@@ -9982,33 +9919,32 @@ extern "C" void LLVMInitializeARMAsmParser() {
#include ""
+// FIXME: This structure should be moved inside ARMTargetParser
+// when we start to table-generate them, and we can use the ARM
+// flags below, that were generated by table-gen.
static const struct {
- const char *Name;
+ const ARM::ArchExtKind Kind;
const unsigned ArchCheck;
const FeatureBitset Features;
} Extensions[] = {
- { "crc", Feature_HasV8, {ARM::FeatureCRC} },
- { "crypto", Feature_HasV8,
+ { ARM::AEK_CRC, Feature_HasV8, {ARM::FeatureCRC} },
+ { ARM::AEK_CRYPTO, Feature_HasV8,
{ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
- { "fp", Feature_HasV8, {ARM::FeatureFPARMv8} },
- { "idiv", Feature_HasV7 | Feature_IsNotMClass,
+ { ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} },
+ { ARM::AEK_HWDIV, Feature_HasV7 | Feature_IsNotMClass,
{ARM::FeatureHWDiv, ARM::FeatureHWDivARM} },
- // FIXME: iWMMXT not supported
- { "iwmmxt", Feature_None, {} },
- // FIXME: iWMMXT2 not supported
- { "iwmmxt2", Feature_None, {} },
- // FIXME: Maverick not supported
- { "maverick", Feature_None, {} },
- { "mp", Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} },
- // FIXME: ARMv6-m OS Extensions feature not checked
- { "os", Feature_None, {} },
+ { ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} },
+ { ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} },
// FIXME: Also available in ARMv6-K
- { "sec", Feature_HasV7, {ARM::FeatureTrustZone} },
- { "simd", Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} },
+ { ARM::AEK_SEC, Feature_HasV7, {ARM::FeatureTrustZone} },
// FIXME: Only available in A-class, isel not predicated
- { "virt", Feature_HasV7, {ARM::FeatureVirtualization} },
- // FIXME: xscale not supported
- { "xscale", Feature_None, {} },
+ { ARM::AEK_VIRT, Feature_HasV7, {ARM::FeatureVirtualization} },
+ // FIXME: Unsupported extensions.
+ { ARM::AEK_OS, Feature_None, {} },
+ { ARM::AEK_IWMMXT, Feature_None, {} },
+ { ARM::AEK_IWMMXT2, Feature_None, {} },
+ { ARM::AEK_MAVERICK, Feature_None, {} },
+ { ARM::AEK_XSCALE, Feature_None, {} },
/// parseDirectiveArchExtension
@@ -10031,9 +9967,12 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
EnableFeature = false;
Name = Name.substr(2);
+ unsigned FeatureKind = ARMTargetParser::parseArchExt(Name);
+ if (FeatureKind == ARM::AEK_INVALID)
+ Error(ExtLoc, "unknown architectural extension: " + Name);
for (const auto &Extension : Extensions) {
- if (Extension.Name != Name)
+ if (Extension.Kind != FeatureKind)
if (Extension.Features.none())
@@ -10080,7 +10019,7 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
if (Op.isImm()) {
const MCExpr *SOExpr = Op.getImm();
int64_t Value;
- if (!SOExpr->EvaluateAsAbsolute(Value))
+ if (!SOExpr->evaluateAsAbsolute(Value))
return Match_Success;
assert((Value >= INT32_MIN && Value <= UINT32_MAX) &&
"expression value must be representable in 32 bits");
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 2d36c30..0bff521 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -329,7 +329,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
const MCExpr *Expr = Op.getExpr();
switch (Expr->getKind()) {
case MCExpr::Binary:
- O << '#' << *Expr;
+ O << '#';
+ Expr->print(O, &MAI);
case MCExpr::Constant: {
// If a symbolic branch target was added as a constant expression then
@@ -337,8 +338,9 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
// address.
const MCConstantExpr *Constant = cast<MCConstantExpr>(Expr);
int64_t TargetAddress;
- if (!Constant->EvaluateAsAbsolute(TargetAddress)) {
- O << '#' << *Expr;
+ if (!Constant->evaluateAsAbsolute(TargetAddress)) {
+ O << '#';
+ Expr->print(O, &MAI);
} else {
O << "0x";
@@ -348,7 +350,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
// FIXME: Should we always treat this as if it is a constant literal and
// prefix it with '#'?
- O << *Expr;
+ Expr->print(O, &MAI);
@@ -359,7 +361,7 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
if (MO1.isExpr()) {
- O << *MO1.getExpr();
+ MO1.getExpr()->print(O, &MAI);
@@ -1055,7 +1057,7 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
const MCOperand &MO = MI->getOperand(OpNum);
if (MO.isExpr()) {
- O << *MO.getExpr();
+ MO.getExpr()->print(O, &MAI);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index f0eed9b..b03cada 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -622,8 +622,6 @@ namespace ARM_AM {
return Value;
- AMSubMode getLoadStoreMultipleSubMode(int Opcode);
// Floating-point Immediates
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 6c1f789..be23e90 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -260,9 +260,9 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
hasNOP() ? Thumb2_16bitNopEncoding : Thumb1_16bitNopEncoding;
uint64_t NumNops = Count / 2;
for (uint64_t i = 0; i != NumNops; ++i)
- OW->Write16(nopEncoding);
+ OW->write16(nopEncoding);
if (Count & 1)
- OW->Write8(0);
+ OW->write8(0);
return true;
// ARM mode
@@ -270,21 +270,21 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
hasNOP() ? ARMv6T2_NopEncoding : ARMv4_NopEncoding;
uint64_t NumNops = Count / 4;
for (uint64_t i = 0; i != NumNops; ++i)
- OW->Write32(nopEncoding);
+ OW->write32(nopEncoding);
// FIXME: should this function return false when unable to write exactly
// 'Count' bytes with NOP encodings?
switch (Count % 4) {
break; // No leftover bytes to write
case 1:
- OW->Write8(0);
+ OW->write8(0);
case 2:
- OW->Write16(0);
+ OW->write16(0);
case 3:
- OW->Write16(0);
- OW->Write8(0xa0);
+ OW->write16(0);
+ OW->write8(0xa0);
@@ -601,8 +601,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
// the offset when the destination has the same MCFragment.
if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
const MCSymbol &Sym = A->getSymbol();
- const MCSymbolData &SymData = Asm.getSymbolData(Sym);
- IsResolved = (SymData.getFragment() == DF);
+ IsResolved = (Sym.getFragment() == DF);
// We must always generate a relocation for BL/BLX instructions if we have
// a symbol to reference, as the linker relies on knowing the destination
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index f4fedee..804d353 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -37,7 +37,7 @@ namespace {
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override;
- bool needsRelocateWithSymbol(const MCSymbolData &SD,
+ bool needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const override;
@@ -49,7 +49,7 @@ ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
ARMELFObjectWriter::~ARMELFObjectWriter() {}
-bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
+bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const {
// FIXME: This is extremely conservative. This really needs to use a
// whitelist with a clear explanation for why each realocation needs to
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 0eb5a81..6e3af73 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -22,9 +22,7 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCELFStreamer.h"
-#include "llvm/MC/MCELFSymbolFlags.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
@@ -34,7 +32,7 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/ARMEHABI.h"
@@ -216,7 +214,13 @@ ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) {
void ARMTargetAsmStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {
- OS << "\t.thumb_set\t" << *Symbol << ", " << *Value << '\n';
+ const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
+ OS << "\t.thumb_set\t";
+ Symbol->print(OS, MAI);
+ OS << ", ";
+ Value->print(OS, MAI);
+ OS << '\n';
void ARMTargetAsmStreamer::emitInst(uint32_t Inst, char Suffix) {
@@ -562,17 +566,16 @@ private:
MCSymbol *Start = getContext().createTempSymbol();
- MCSymbol *Symbol =
- getContext().getOrCreateSymbol(Name + "." +
- Twine(MappingSymbolCounter++));
+ auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
+ Name + "." + Twine(MappingSymbolCounter++)));
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
- MCELF::SetBinding(SD, ELF::STB_LOCAL);
- SD.setExternal(false);
+ getAssembler().registerSymbol(*Symbol);
+ Symbol->setType(ELF::STT_NOTYPE);
+ Symbol->setBinding(ELF::STB_LOCAL);
+ Symbol->setExternal(false);
AssignSection(Symbol, getCurrentSection().first);
- const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
+ const MCExpr *Value = MCSymbolRefExpr::create(Start, getContext());
@@ -688,16 +691,16 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
using namespace ARMBuildAttrs;
- ARMTargetParser::getArchDefaultCPUName(Arch),
+ ARMTargetParser::getCPUAttr(Arch),
if (EmittedArch == ARM::AK_INVALID)
- ARMTargetParser::getArchDefaultCPUArch(Arch),
+ ARMTargetParser::getArchAttr(Arch),
- ARMTargetParser::getArchDefaultCPUArch(EmittedArch),
+ ARMTargetParser::getArchAttr(EmittedArch),
switch (Arch) {
@@ -813,6 +816,9 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
/* OverwriteExisting= */ false);
+ // ABI_HardFP_use is handled in ARMAsmPrinter, so _SP_D16 is treated the same
+ // as _D16 here.
+ case ARM::FK_FPV4_SP_D16:
case ARM::FK_VFPV4_D16:
@@ -827,6 +833,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
// FPV5_D16 is identical to FP_ARMV8 except for the number of D registers, so
// uses the FP_ARMV8_D16 build attribute.
+ case ARM::FK_FPV5_SP_D16:
case ARM::FK_FPV5_D16:
@@ -861,6 +868,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
+ case ARM::FK_NONE:
@@ -972,9 +980,9 @@ void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) {
if (!Streamer.IsThumb)
- const MCSymbolData &SD = Streamer.getOrCreateSymbolData(Symbol);
- unsigned Type = MCELF::GetType(SD);
- if (Type == ELF_STT_Func || Type == ELF_STT_GnuIFunc)
+ Streamer.getAssembler().registerSymbol(*Symbol);
+ unsigned Type = cast<MCSymbolELF>(Symbol)->getType();
+ if (Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)
@@ -1024,7 +1032,7 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
// Get .ARM.extab or .ARM.exidx section
- const MCSymbol *Group = FnSection.getGroup();
+ const MCSymbolELF *Group = FnSection.getGroup();
if (Group)
Flags |= ELF::SHF_GROUP;
MCSectionELF *EHSection =
@@ -1095,7 +1103,7 @@ void ARMELFStreamer::emitFnEnd() {
const MCSymbolRefExpr *FnStartRef =
- MCSymbolRefExpr::Create(FnStart,
+ MCSymbolRefExpr::create(FnStart,
@@ -1106,7 +1114,7 @@ void ARMELFStreamer::emitFnEnd() {
} else if (ExTab) {
// Emit a reference to the unwind opcodes in the ".ARM.extab" section.
const MCSymbolRefExpr *ExTabEntryRef =
- MCSymbolRefExpr::Create(ExTab,
+ MCSymbolRefExpr::create(ExTab,
EmitValue(ExTabEntryRef, 4);
@@ -1138,7 +1146,7 @@ void ARMELFStreamer::emitCantUnwind() { CantUnwind = true; }
void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
const MCSymbol *PersonalitySym = getContext().getOrCreateSymbol(Name);
- const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::Create(
+ const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::create(
PersonalitySym, MCSymbolRefExpr::VK_ARM_NONE, getContext());
@@ -1186,7 +1194,7 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) {
// Emit personality
if (Personality) {
const MCSymbolRefExpr *PersonalityRef =
- MCSymbolRefExpr::Create(Personality,
+ MCSymbolRefExpr::create(Personality,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index caa8736..1ac0815 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -19,8 +19,7 @@ using namespace llvm;
void ARMMCAsmInfoDarwin::anchor() { }
-ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(StringRef TT) {
- Triple TheTriple(TT);
+ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(const Triple &TheTriple) {
if ((TheTriple.getArch() == Triple::armeb) ||
(TheTriple.getArch() == Triple::thumbeb))
IsLittleEndian = false;
@@ -41,8 +40,7 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(StringRef TT) {
void ARMELFMCAsmInfo::anchor() { }
-ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) {
- Triple TheTriple(TT);
+ARMELFMCAsmInfo::ARMELFMCAsmInfo(const Triple &TheTriple) {
if ((TheTriple.getArch() == Triple::armeb) ||
(TheTriple.getArch() == Triple::thumbeb))
IsLittleEndian = false;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index 6cb4715..99a5fff 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -19,18 +19,19 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
+ class Triple;
class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
virtual void anchor();
- explicit ARMMCAsmInfoDarwin(StringRef TT);
+ explicit ARMMCAsmInfoDarwin(const Triple &TheTriple);
class ARMELFMCAsmInfo : public MCAsmInfoELF {
void anchor() override;
- explicit ARMELFMCAsmInfo(StringRef TT);
+ explicit ARMELFMCAsmInfo(const Triple &TT);
void setUseIntegratedAssembler(bool Value) override;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 5b90de3..2063ca6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -16,12 +16,12 @@ using namespace llvm;
#define DEBUG_TYPE "armmcexpr"
const ARMMCExpr*
-ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+ARMMCExpr::create(VariantKind Kind, const MCExpr *Expr,
MCContext &Ctx) {
return new (Ctx) ARMMCExpr(Kind, Expr);
-void ARMMCExpr::PrintImpl(raw_ostream &OS) const {
+void ARMMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
switch (Kind) {
default: llvm_unreachable("Invalid kind!");
case VK_ARM_HI16: OS << ":upper16:"; break;
@@ -31,7 +31,7 @@ void ARMMCExpr::PrintImpl(raw_ostream &OS) const {
const MCExpr *Expr = getSubExpr();
if (Expr->getKind() != MCExpr::SymbolRef)
OS << '(';
- Expr->print(OS);
+ Expr->print(OS, MAI);
if (Expr->getKind() != MCExpr::SymbolRef)
OS << ')';
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index a52abe7..9146d4d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -33,15 +33,15 @@ public:
/// @name Construction
/// @{
- static const ARMMCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+ static const ARMMCExpr *create(VariantKind Kind, const MCExpr *Expr,
MCContext &Ctx);
- static const ARMMCExpr *CreateUpper16(const MCExpr *Expr, MCContext &Ctx) {
- return Create(VK_ARM_HI16, Expr, Ctx);
+ static const ARMMCExpr *createUpper16(const MCExpr *Expr, MCContext &Ctx) {
+ return create(VK_ARM_HI16, Expr, Ctx);
- static const ARMMCExpr *CreateLower16(const MCExpr *Expr, MCContext &Ctx) {
- return Create(VK_ARM_LO16, Expr, Ctx);
+ static const ARMMCExpr *createLower16(const MCExpr *Expr, MCContext &Ctx) {
+ return create(VK_ARM_LO16, Expr, Ctx);
/// @}
@@ -56,15 +56,15 @@ public:
/// @}
- void PrintImpl(raw_ostream &OS) const override;
- bool EvaluateAsRelocatableImpl(MCValue &Res,
+ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+ bool evaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const override {
return false;
void visitUsedExpr(MCStreamer &Streamer) const override;
- MCSection *FindAssociatedSection() const override {
- return getSubExpr()->FindAssociatedSection();
+ MCSection *findAssociatedSection() const override {
+ return getSubExpr()->findAssociatedSection();
// There are no TLS ARMMCExprs at the moment.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 30deba9..92c4d6a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -277,18 +277,17 @@ static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) {
return X;
-static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
- Triple TheTriple(TT);
+static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI,
+ const Triple &TheTriple) {
MCAsmInfo *MAI;
if (TheTriple.isOSDarwin() || TheTriple.isOSBinFormatMachO())
- MAI = new ARMMCAsmInfoDarwin(TT);
+ MAI = new ARMMCAsmInfoDarwin(TheTriple);
else if (TheTriple.isWindowsItaniumEnvironment())
else if (TheTriple.isWindowsMSVCEnvironment())
MAI = new ARMCOFFMCAsmInfoMicrosoft();
- MAI = new ARMELFMCAsmInfo(TT);
+ MAI = new ARMELFMCAsmInfo(TheTriple);
unsigned Reg = MRI.getDwarfRegNum(ARM::SP, true);
MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(nullptr, Reg, 0));
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
index d4b00e6..4468132 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
@@ -26,9 +26,9 @@ public:
unsigned VariantKind) override {
switch(VariantKind) {
case LLVMDisassembler_VariantKind_ARM_HI16:
- return ARMMCExpr::CreateUpper16(SubExpr, Ctx);
+ return ARMMCExpr::createUpper16(SubExpr, Ctx);
case LLVMDisassembler_VariantKind_ARM_LO16:
- return ARMMCExpr::CreateLower16(SubExpr, Ctx);
+ return ARMMCExpr::createLower16(SubExpr, Ctx);
return MCRelocationInfo::createExprForCAPIVariantKind(SubExpr,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 9755330..95d7ea7 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -17,7 +17,6 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFixupKindInfo.h"
-#include "llvm/MC/MCMachOSymbolFlags.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCValue.h"
@@ -49,12 +48,10 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter {
const MCSymbol &S, uint64_t FixedValue);
- ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
- uint32_t CPUSubtype)
- : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
- /*UseAggressiveSymbolFolding=*/true) {}
+ ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
- void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
+ void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
const MCAsmLayout &Layout, const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) override;
@@ -152,23 +149,21 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- const MCSymbolData *A_SD = &Asm.getSymbolData(*A);
- if (!A_SD->getFragment())
+ if (!A->getFragment())
"symbol '" + A->getName() +
"' can not be undefined in a subtraction expression");
uint32_t Value = Writer->getSymbolAddress(*A, Layout);
uint32_t Value2 = 0;
- uint64_t SecAddr =
- Writer->getSectionAddress(A_SD->getFragment()->getParent());
+ uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
FixedValue += SecAddr;
if (const MCSymbolRefExpr *B = Target.getSymB()) {
- const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+ const MCSymbol *SB = &B->getSymbol();
- if (!B_SD->getFragment())
+ if (!SB->getFragment())
"symbol '" + B->getSymbol().getName() +
"' can not be undefined in a subtraction expression");
@@ -176,7 +171,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
// Select the appropriate difference relocation type.
Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
- FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+ FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
// Relocations are written out in reverse order, so the PAIR comes first.
@@ -255,24 +250,22 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- const MCSymbolData *A_SD = &Asm.getSymbolData(*A);
- if (!A_SD->getFragment())
+ if (!A->getFragment())
"symbol '" + A->getName() +
"' can not be undefined in a subtraction expression");
uint32_t Value = Writer->getSymbolAddress(*A, Layout);
- uint64_t SecAddr =
- Writer->getSectionAddress(A_SD->getFragment()->getParent());
+ uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
FixedValue += SecAddr;
uint32_t Value2 = 0;
if (const MCSymbolRefExpr *B = Target.getSymB()) {
assert(Type == MachO::ARM_RELOC_VANILLA && "invalid reloc for 2 symbols");
- const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+ const MCSymbol *SB = &B->getSymbol();
- if (!B_SD->getFragment())
+ if (!SB->getFragment())
"symbol '" + B->getSymbol().getName() +
"' can not be undefined in a subtraction expression");
@@ -280,7 +273,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
// Select the appropriate difference relocation type.
Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
- FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+ FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
// Relocations are written out in reverse order, so the PAIR comes first.
@@ -344,7 +337,7 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
return false;
-void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
+void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer,
MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -405,7 +398,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
// Resolve constant variables.
if (A->isVariable()) {
int64_t Res;
- if (A->getVariableValue()->EvaluateAsAbsolute(
+ if (A->getVariableValue()->evaluateAsAbsolute(
Res, Layout, Writer->getSectionAddressMap())) {
FixedValue = Res;
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index b62ae2e..68736bc 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -94,12 +94,12 @@ static void TrackDefUses(MachineInstr *MI,
/// conservatively remove more kill flags than are necessary, but removing them
/// is safer than incorrect kill flags remaining on instructions.
static void ClearKillFlags(MachineInstr *MI, SmallSet<unsigned, 4> &Uses) {
- for (MIOperands MO(MI); MO.isValid(); ++MO) {
- if (!MO->isReg() || MO->isDef() || !MO->isKill())
+ for (MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || MO.isDef() || !MO.isKill())
- if (!Uses.count(MO->getReg()))
+ if (!Uses.count(MO.getReg()))
- MO->setIsKill(false);
+ MO.setIsKill(false);
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 0ab1ff9..d9ab824 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -133,7 +133,7 @@ namespace {
class Thumb2SizeReduce : public MachineFunctionPass {
static char ID;
- Thumb2SizeReduce();
+ Thumb2SizeReduce(std::function<bool(const Function &)> Ftor);
const Thumb2InstrInfo *TII;
const ARMSubtarget *STI;
@@ -198,11 +198,14 @@ namespace {
SmallVector<MBBInfo, 8> BlockInfo;
+ std::function<bool(const Function &)> PredicateFtor;
char Thumb2SizeReduce::ID = 0;
-Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
+Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
+ : MachineFunctionPass(ID), PredicateFtor(Ftor) {
OptimizeSize = MinimizeSize = false;
for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
unsigned FromOpc = ReduceTable[i].WideOpc;
@@ -1000,6 +1003,9 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
+ if (PredicateFtor && !PredicateFtor(*MF.getFunction()))
+ return false;
STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
if (STI->isThumb1Only() || STI->prefers32BitThumb())
return false;
@@ -1025,6 +1031,7 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
/// reduction pass.
-FunctionPass *llvm::createThumb2SizeReductionPass() {
- return new Thumb2SizeReduce();
+FunctionPass *llvm::createThumb2SizeReductionPass(
+ std::function<bool(const Function &)> Ftor) {
+ return new Thumb2SizeReduce(Ftor);
diff --git a/lib/Target/BPF/BPFAsmPrinter.cpp b/lib/Target/BPF/BPFAsmPrinter.cpp
index 3237596..10ec658 100644
--- a/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -83,5 +83,7 @@ void BPFAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Force static initialization.
extern "C" void LLVMInitializeBPFAsmPrinter() {
- RegisterAsmPrinter<BPFAsmPrinter> X(TheBPFTarget);
+ RegisterAsmPrinter<BPFAsmPrinter> X(TheBPFleTarget);
+ RegisterAsmPrinter<BPFAsmPrinter> Y(TheBPFbeTarget);
+ RegisterAsmPrinter<BPFAsmPrinter> Z(TheBPFTarget);
diff --git a/lib/Target/BPF/BPFMCInstLower.cpp b/lib/Target/BPF/BPFMCInstLower.cpp
index d608afb..00bd8d9 100644
--- a/lib/Target/BPF/BPFMCInstLower.cpp
+++ b/lib/Target/BPF/BPFMCInstLower.cpp
@@ -33,7 +33,7 @@ BPFMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
MCOperand BPFMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
MCSymbol *Sym) const {
- const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
if (!MO.isJTI() && MO.getOffset())
llvm_unreachable("unknown symbol op");
@@ -63,7 +63,7 @@ void BPFMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
case MachineOperand::MO_MachineBasicBlock:
MCOp = MCOperand::createExpr(
- MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx));
+ MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx));
case MachineOperand::MO_RegisterMask:
diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp
index 9487427..3329d5f 100644
--- a/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/lib/Target/BPF/BPFTargetMachine.cpp
@@ -23,19 +23,24 @@ using namespace llvm;
extern "C" void LLVMInitializeBPFTarget() {
// Register the target.
- RegisterTargetMachine<BPFTargetMachine> X(TheBPFTarget);
+ RegisterTargetMachine<BPFTargetMachine> X(TheBPFleTarget);
+ RegisterTargetMachine<BPFTargetMachine> Y(TheBPFbeTarget);
+ RegisterTargetMachine<BPFTargetMachine> Z(TheBPFTarget);
+// DataLayout: little or big endian
+static std::string computeDataLayout(StringRef TT) {
+ if (Triple(TT).getArch() == Triple::bpfeb)
+ return "E-m:e-p:64:64-i64:64-n32:64-S128";
+ else
+ return "e-m:e-p:64:64-i64:64-n32:64-S128";
-// DataLayout --> Little-endian, 64-bit pointer/ABI/alignment
-// The stack is always 8 byte aligned
-// On function prologue, the stack is created by decrementing
-// its pointer. Once decremented, all references are done with positive
-// offset from the stack/frame pointer.
BPFTargetMachine::BPFTargetMachine(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, "e-m:e-p:64:64-i64:64-n32:64-S128", TT, CPU, FS,
+ : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS,
Options, RM, CM, OL),
Subtarget(TT, CPU, FS, *this) {
diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 48f34e4..7b1d925 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -25,7 +25,10 @@ using namespace llvm;
namespace {
class BPFAsmBackend : public MCAsmBackend {
- BPFAsmBackend() : MCAsmBackend() {}
+ bool IsLittleEndian;
+ BPFAsmBackend(bool IsLittleEndian)
+ : MCAsmBackend(), IsLittleEndian(IsLittleEndian) {}
~BPFAsmBackend() override {}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
@@ -54,7 +57,7 @@ bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return false;
for (uint64_t i = 0; i < Count; i += 8)
- OW->Write64(0x15000000);
+ OW->write64(0x15000000);
return true;
@@ -69,17 +72,28 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
assert(Fixup.getKind() == FK_PCRel_2);
Value = (uint16_t)((Value - 8) / 8);
- Data[Fixup.getOffset() + 2] = Value & 0xFF;
- Data[Fixup.getOffset() + 3] = Value >> 8;
+ if (IsLittleEndian) {
+ Data[Fixup.getOffset() + 2] = Value & 0xFF;
+ Data[Fixup.getOffset() + 3] = Value >> 8;
+ } else {
+ Data[Fixup.getOffset() + 2] = Value >> 8;
+ Data[Fixup.getOffset() + 3] = Value & 0xFF;
+ }
MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
- return createBPFELFObjectWriter(OS, 0);
+ return createBPFELFObjectWriter(OS, 0, IsLittleEndian);
MCAsmBackend *llvm::createBPFAsmBackend(const Target &T,
const MCRegisterInfo &MRI, StringRef TT,
StringRef CPU) {
- return new BPFAsmBackend();
+ return new BPFAsmBackend(/*IsLittleEndian=*/true);
+MCAsmBackend *llvm::createBPFbeAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI, StringRef TT,
+ StringRef CPU) {
+ return new BPFAsmBackend(/*IsLittleEndian=*/false);
diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index a5562c1..05ba618 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -47,7 +47,8 @@ unsigned BPFELFObjectWriter::GetRelocType(const MCValue &Target,
-MCObjectWriter *llvm::createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) {
+MCObjectWriter *llvm::createBPFELFObjectWriter(raw_pwrite_stream &OS,
+ uint8_t OSABI, bool IsLittleEndian) {
MCELFObjectTargetWriter *MOTW = new BPFELFObjectWriter(OSABI);
- return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
+ return createELFObjectWriter(MOTW, OS, IsLittleEndian);
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index ab61ae7..d63bbf4 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -16,13 +16,18 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
namespace llvm {
class Target;
+class Triple;
class BPFMCAsmInfo : public MCAsmInfo {
- explicit BPFMCAsmInfo(StringRef TT) {
+ explicit BPFMCAsmInfo(const Triple &TT) {
+ if (TT.getArch() == Triple::bpfeb)
+ IsLittleEndian = false;
PrivateGlobalPrefix = ".L";
WeakRefDirective = "\t.weak\t";
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
index ba8a874..dc4ede3 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -30,9 +30,11 @@ class BPFMCCodeEmitter : public MCCodeEmitter {
BPFMCCodeEmitter(const BPFMCCodeEmitter &) = delete;
void operator=(const BPFMCCodeEmitter &) = delete;
const MCRegisterInfo &MRI;
+ bool IsLittleEndian;
- BPFMCCodeEmitter(const MCRegisterInfo &mri) : MRI(mri) {}
+ BPFMCCodeEmitter(const MCRegisterInfo &mri, bool IsLittleEndian)
+ : MRI(mri), IsLittleEndian(IsLittleEndian) {}
~BPFMCCodeEmitter() {}
@@ -61,7 +63,13 @@ public:
MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx) {
- return new BPFMCCodeEmitter(MRI);
+ return new BPFMCCodeEmitter(MRI, true);
+MCCodeEmitter *llvm::createBPFbeMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new BPFMCCodeEmitter(MRI, false);
unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI,
@@ -91,59 +99,53 @@ unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI,
return 0;
-// Emit one byte through output stream
-void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) {
- OS << (char)C;
- ++CurByte;
-// Emit a series of bytes (little endian)
-void EmitLEConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
- raw_ostream &OS) {
- assert(Size <= 8 && "size too big in emit constant");
- for (unsigned i = 0; i != Size; ++i) {
- EmitByte(Val & 255, CurByte, OS);
- Val >>= 8;
- }
-// Emit a series of bytes (big endian)
-void EmitBEConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
- raw_ostream &OS) {
- assert(Size <= 8 && "size too big in emit constant");
- for (int i = (Size - 1) * 8; i >= 0; i -= 8)
- EmitByte((Val >> i) & 255, CurByte, OS);
+static uint8_t SwapBits(uint8_t Val)
+ return (Val & 0x0F) << 4 | (Val & 0xF0) >> 4;
void BPFMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
unsigned Opcode = MI.getOpcode();
- // Keep track of the current byte being emitted
- unsigned CurByte = 0;
+ support::endian::Writer<support::little> LE(OS);
+ support::endian::Writer<support::big> BE(OS);
if (Opcode == BPF::LD_imm64 || Opcode == BPF::LD_pseudo) {
uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI);
- EmitByte(Value >> 56, CurByte, OS);
- EmitByte(((Value >> 48) & 0xff), CurByte, OS);
- EmitLEConstant(0, 2, CurByte, OS);
- EmitLEConstant(Value & 0xffffFFFF, 4, CurByte, OS);
+ LE.write<uint8_t>(Value >> 56);
+ if (IsLittleEndian)
+ LE.write<uint8_t>((Value >> 48) & 0xff);
+ else
+ LE.write<uint8_t>(SwapBits((Value >> 48) & 0xff));
+ LE.write<uint16_t>(0);
+ if (IsLittleEndian)
+ LE.write<uint32_t>(Value & 0xffffFFFF);
+ else
+ BE.write<uint32_t>(Value & 0xffffFFFF);
const MCOperand &MO = MI.getOperand(1);
uint64_t Imm = MO.isImm() ? MO.getImm() : 0;
- EmitByte(0, CurByte, OS);
- EmitByte(0, CurByte, OS);
- EmitLEConstant(0, 2, CurByte, OS);
- EmitLEConstant(Imm >> 32, 4, CurByte, OS);
+ LE.write<uint8_t>(0);
+ LE.write<uint8_t>(0);
+ LE.write<uint16_t>(0);
+ if (IsLittleEndian)
+ LE.write<uint32_t>(Imm >> 32);
+ else
+ BE.write<uint32_t>(Imm >> 32);
} else {
// Get instruction encoding and emit it
uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI);
- EmitByte(Value >> 56, CurByte, OS);
- EmitByte((Value >> 48) & 0xff, CurByte, OS);
- EmitLEConstant((Value >> 32) & 0xffff, 2, CurByte, OS);
- EmitLEConstant(Value & 0xffffFFFF, 4, CurByte, OS);
+ LE.write<uint8_t>(Value >> 56);
+ if (IsLittleEndian) {
+ LE.write<uint8_t>((Value >> 48) & 0xff);
+ LE.write<uint16_t>((Value >> 32) & 0xffff);
+ LE.write<uint32_t>(Value & 0xffffFFFF);
+ } else {
+ LE.write<uint8_t>(SwapBits((Value >> 48) & 0xff));
+ BE.write<uint16_t>((Value >> 32) & 0xffff);
+ BE.write<uint32_t>(Value & 0xffffFFFF);
+ }
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index c4cf4b8..7cedba9 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -79,32 +79,43 @@ static MCInstPrinter *createBPFMCInstPrinter(const Triple &T,
extern "C" void LLVMInitializeBPFTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfo<BPFMCAsmInfo> X(TheBPFTarget);
+ for (Target *T : {&TheBPFleTarget, &TheBPFbeTarget, &TheBPFTarget}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<BPFMCAsmInfo> X(*T);
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheBPFTarget, createBPFMCCodeGenInfo);
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createBPFMCCodeGenInfo);
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheBPFTarget, createBPFMCInstrInfo);
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createBPFMCInstrInfo);
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheBPFTarget, createBPFMCRegisterInfo);
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createBPFMCRegisterInfo);
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheBPFTarget,
- createBPFMCSubtargetInfo);
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T,
+ createBPFMCSubtargetInfo);
- // Register the MC code emitter
- TargetRegistry::RegisterMCCodeEmitter(TheBPFTarget,
- llvm::createBPFMCCodeEmitter);
+ // Register the object streamer
+ TargetRegistry::RegisterELFStreamer(*T, createBPFMCStreamer);
- // Register the ASM Backend
- TargetRegistry::RegisterMCAsmBackend(TheBPFTarget, createBPFAsmBackend);
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createBPFMCInstPrinter);
+ }
- // Register the object streamer
- TargetRegistry::RegisterELFStreamer(TheBPFTarget, createBPFMCStreamer);
+ // Register the MC code emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheBPFleTarget, createBPFMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheBPFbeTarget, createBPFbeMCCodeEmitter);
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(TheBPFTarget, createBPFMCInstPrinter);
+ // Register the ASM Backend
+ TargetRegistry::RegisterMCAsmBackend(TheBPFleTarget, createBPFAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheBPFbeTarget, createBPFbeAsmBackend);
+ if (sys::IsLittleEndianHost) {
+ TargetRegistry::RegisterMCCodeEmitter(TheBPFTarget, createBPFMCCodeEmitter);
+ TargetRegistry::RegisterMCAsmBackend(TheBPFTarget, createBPFAsmBackend);
+ } else {
+ TargetRegistry::RegisterMCCodeEmitter(TheBPFTarget, createBPFbeMCCodeEmitter);
+ TargetRegistry::RegisterMCAsmBackend(TheBPFTarget, createBPFbeAsmBackend);
+ }
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index ce08b7c..a9ba7d9 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -30,16 +30,24 @@ class StringRef;
class raw_ostream;
class raw_pwrite_stream;
+extern Target TheBPFleTarget;
+extern Target TheBPFbeTarget;
extern Target TheBPFTarget;
MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
+MCCodeEmitter *createBPFbeMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx);
MCAsmBackend *createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU);
+MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
-MCObjectWriter *createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI);
+MCObjectWriter *createBPFELFObjectWriter(raw_pwrite_stream &OS,
+ uint8_t OSABI, bool IsLittleEndian);
// Defines symbolic names for BPF registers. This defines a mapping from
diff --git a/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp b/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
index 87716e6..a16dbae 100644
--- a/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
+++ b/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
@@ -11,8 +11,18 @@
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-Target llvm::TheBPFTarget;
+namespace llvm {
+Target TheBPFleTarget;
+Target TheBPFbeTarget;
+Target TheBPFTarget;
extern "C" void LLVMInitializeBPFTargetInfo() {
- RegisterTarget<Triple::bpf, /*HasJIT=*/true> X(TheBPFTarget, "bpf", "BPF");
+ TargetRegistry::RegisterTarget(TheBPFTarget, "bpf",
+ "BPF (host endian)",
+ [](Triple::ArchType) { return false; }, true);
+ RegisterTarget<Triple::bpfel, /*HasJIT=*/true> X(
+ TheBPFleTarget, "bpfel", "BPF (little endian)");
+ RegisterTarget<Triple::bpfeb, /*HasJIT=*/true> Y(
+ TheBPFbeTarget, "bpfeb", "BPF (big endian)");
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 1805437..e6d0199 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMTarget
+ TargetRecip.cpp
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index f1a7127..b837798 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -645,8 +645,7 @@ void CppWriter::printType(Type* Ty) {
if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
std::string elemName(getCppName(ET));
Out << "ArrayType* " << typeName << " = ArrayType::get("
- << elemName
- << ", " << utostr(AT->getNumElements()) << ");";
+ << elemName << ", " << AT->getNumElements() << ");";
@@ -658,8 +657,7 @@ void CppWriter::printType(Type* Ty) {
if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
std::string elemName(getCppName(ET));
Out << "PointerType* " << typeName << " = PointerType::get("
- << elemName
- << ", " << utostr(PT->getAddressSpace()) << ");";
+ << elemName << ", " << PT->getAddressSpace() << ");";
@@ -671,8 +669,7 @@ void CppWriter::printType(Type* Ty) {
if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
std::string elemName(getCppName(ET));
Out << "VectorType* " << typeName << " = VectorType::get("
- << elemName
- << ", " << utostr(PT->getNumElements()) << ");";
+ << elemName << ", " << PT->getNumElements() << ");";
@@ -1029,7 +1026,7 @@ void CppWriter::printVariableHead(const GlobalVariable *GV) {
if (GV->getAlignment()) {
- Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");";
+ Out << "->setAlignment(" << GV->getAlignment() << ");";
if (GV->getVisibility() != GlobalValue::DefaultVisibility) {
diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index a60d1e4..14f9d77 100644
--- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -7,9 +7,11 @@
+#include "Hexagon.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCExpr.h"
@@ -27,6 +29,7 @@
#include <vector>
using namespace llvm;
+using namespace Hexagon;
#define DEBUG_TYPE "hexagon-disassembler"
@@ -37,9 +40,14 @@ namespace {
/// \brief Hexagon disassembler for all Hexagon platforms.
class HexagonDisassembler : public MCDisassembler {
+ std::unique_ptr<MCInst *> CurrentBundle;
HexagonDisassembler(MCSubtargetInfo const &STI, MCContext &Ctx)
- : MCDisassembler(STI, Ctx) {}
+ : MCDisassembler(STI, Ctx), CurrentBundle(new MCInst *) {}
+ DecodeStatus getSingleInstruction(MCInst &Instr, MCInst &MCB,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream, raw_ostream &CStream,
+ bool &Complete) const;
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
@@ -48,37 +56,43 @@ public:
static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, void const *Decoder);
+ uint64_t Address,
+ void const *Decoder);
+static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op,
+ raw_ostream &os);
+static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst);
static const uint16_t IntRegDecoderTable[] = {
- Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
- Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9,
- Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14,
- Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
- Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24,
- Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29,
- Hexagon::R30, Hexagon::R31 };
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9,
+ Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14,
+ Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
+ Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24,
+ Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29,
+ Hexagon::R30, Hexagon::R31};
-static const uint16_t PredRegDecoderTable[] = { Hexagon::P0, Hexagon::P1,
-Hexagon::P2, Hexagon::P3 };
+static const uint16_t PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1,
+ Hexagon::P2, Hexagon::P3};
static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
- const uint16_t Table[], size_t Size) {
+ const uint16_t Table[], size_t Size) {
if (RegNo < Size) {
return MCDisassembler::Success;
- }
- else
+ } else
return MCDisassembler::Fail;
static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- void const *Decoder) {
+ uint64_t /*Address*/,
+ void const *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -88,13 +102,13 @@ static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/, const void *Decoder) {
+ uint64_t /*Address*/,
+ const void *Decoder) {
static const uint16_t CtrlRegDecoderTable[] = {
- Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1,
- Hexagon::P3_0, Hexagon::NoRegister, Hexagon::C6, Hexagon::C7,
- Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP,
- Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPCH
- };
+ Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1,
+ Hexagon::P3_0, Hexagon::NoRegister, Hexagon::C6, Hexagon::C7,
+ Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP,
+ Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPCH};
if (RegNo >= sizeof(CtrlRegDecoderTable) / sizeof(CtrlRegDecoderTable[0]))
return MCDisassembler::Fail;
@@ -108,17 +122,15 @@ static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/, void const *Decoder) {
+ uint64_t /*Address*/,
+ void const *Decoder) {
static const uint16_t CtrlReg64DecoderTable[] = {
- Hexagon::C1_0, Hexagon::NoRegister,
- Hexagon::C3_2, Hexagon::NoRegister,
- Hexagon::NoRegister, Hexagon::NoRegister,
- Hexagon::C7_6, Hexagon::NoRegister,
- Hexagon::C9_8, Hexagon::NoRegister,
- Hexagon::C11_10, Hexagon::NoRegister,
- Hexagon::CS, Hexagon::NoRegister,
- Hexagon::UPC, Hexagon::NoRegister
- };
+ Hexagon::C1_0, Hexagon::NoRegister, Hexagon::C3_2,
+ Hexagon::NoRegister, Hexagon::NoRegister, Hexagon::NoRegister,
+ Hexagon::C7_6, Hexagon::NoRegister, Hexagon::C9_8,
+ Hexagon::NoRegister, Hexagon::C11_10, Hexagon::NoRegister,
+ Hexagon::CS, Hexagon::NoRegister, Hexagon::UPC,
+ Hexagon::NoRegister};
if (RegNo >= sizeof(CtrlReg64DecoderTable) / sizeof(CtrlReg64DecoderTable[0]))
return MCDisassembler::Fail;
@@ -132,7 +144,8 @@ static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/, const void *Decoder) {
+ uint64_t /*Address*/,
+ const void *Decoder) {
unsigned Register = 0;
switch (RegNo) {
case 0:
@@ -149,22 +162,21 @@ static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/, const void *Decoder) {
+ uint64_t /*Address*/,
+ const void *Decoder) {
static const uint16_t DoubleRegDecoderTable[] = {
- Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3,
- Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7,
- Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11,
- Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15
- };
+ Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3,
+ Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7,
+ Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11,
+ Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15};
- return (DecodeRegisterClass(Inst, RegNo >> 1,
- DoubleRegDecoderTable,
- sizeof (DoubleRegDecoderTable)));
+ return (DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable,
+ sizeof(DoubleRegDecoderTable)));
static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- void const *Decoder) {
+ uint64_t /*Address*/,
+ void const *Decoder) {
if (RegNo > 3)
return MCDisassembler::Fail;
@@ -191,17 +203,687 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
uint64_t Address,
raw_ostream &os,
raw_ostream &cs) const {
- Size = 4;
- if (Bytes.size() < 4)
- return MCDisassembler::Fail;
+ DecodeStatus Result = DecodeStatus::Success;
+ bool Complete = false;
+ Size = 0;
+ *CurrentBundle = &MI;
+ MI.setOpcode(Hexagon::BUNDLE);
+ MI.addOperand(MCOperand::createImm(0));
+ while (Result == Success && Complete == false) {
+ if (Bytes.size() < HEXAGON_INSTR_SIZE)
+ return MCDisassembler::Fail;
+ MCInst *Inst = new (getContext()) MCInst;
+ Result = getSingleInstruction(*Inst, MI, Bytes, Address, os, cs, Complete);
+ MI.addOperand(MCOperand::createInst(Inst));
+ Bytes = Bytes.slice(HEXAGON_INSTR_SIZE);
+ }
+ return Result;
+DecodeStatus HexagonDisassembler::getSingleInstruction(
+ MCInst &MI, MCInst &MCB, ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &os, raw_ostream &cs, bool &Complete) const {
+ assert(Bytes.size() >= HEXAGON_INSTR_SIZE);
- uint32_t insn =
+ uint32_t Instruction =
llvm::support::endian::read<uint32_t, llvm::support::little,
- // Remove parse bits.
- insn &= ~static_cast<uint32_t>(HexagonII::InstParseBits::INST_PARSE_MASK);
- DecodeStatus Result = decodeInstruction(DecoderTable32, MI, insn, Address, this, STI);
- HexagonMCInstrInfo::AppendImplicitOperands(MI);
+ auto BundleSize = HexagonMCInstrInfo::bundleSize(MCB);
+ if ((Instruction & HexagonII::INST_PARSE_MASK) ==
+ if (BundleSize == 0)
+ HexagonMCInstrInfo::setInnerLoop(MCB);
+ else if (BundleSize == 1)
+ HexagonMCInstrInfo::setOuterLoop(MCB);
+ else
+ return DecodeStatus::Fail;
+ }
+ DecodeStatus Result = DecodeStatus::Success;
+ if ((Instruction & HexagonII::INST_PARSE_MASK) ==
+ // Determine the instruction class of each instruction in the duplex.
+ unsigned duplexIClass, IClassLow, IClassHigh;
+ duplexIClass = ((Instruction >> 28) & 0xe) | ((Instruction >> 13) & 0x1);
+ switch (duplexIClass) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ IClassLow = HexagonII::HSIG_L1;
+ IClassHigh = HexagonII::HSIG_L1;
+ break;
+ case 1:
+ IClassLow = HexagonII::HSIG_L2;
+ IClassHigh = HexagonII::HSIG_L1;
+ break;
+ case 2:
+ IClassLow = HexagonII::HSIG_L2;
+ IClassHigh = HexagonII::HSIG_L2;
+ break;
+ case 3:
+ IClassLow = HexagonII::HSIG_A;
+ IClassHigh = HexagonII::HSIG_A;
+ break;
+ case 4:
+ IClassLow = HexagonII::HSIG_L1;
+ IClassHigh = HexagonII::HSIG_A;
+ break;
+ case 5:
+ IClassLow = HexagonII::HSIG_L2;
+ IClassHigh = HexagonII::HSIG_A;
+ break;
+ case 6:
+ IClassLow = HexagonII::HSIG_S1;
+ IClassHigh = HexagonII::HSIG_A;
+ break;
+ case 7:
+ IClassLow = HexagonII::HSIG_S2;
+ IClassHigh = HexagonII::HSIG_A;
+ break;
+ case 8:
+ IClassLow = HexagonII::HSIG_S1;
+ IClassHigh = HexagonII::HSIG_L1;
+ break;
+ case 9:
+ IClassLow = HexagonII::HSIG_S1;
+ IClassHigh = HexagonII::HSIG_L2;
+ break;
+ case 10:
+ IClassLow = HexagonII::HSIG_S1;
+ IClassHigh = HexagonII::HSIG_S1;
+ break;
+ case 11:
+ IClassLow = HexagonII::HSIG_S2;
+ IClassHigh = HexagonII::HSIG_S1;
+ break;
+ case 12:
+ IClassLow = HexagonII::HSIG_S2;
+ IClassHigh = HexagonII::HSIG_L1;
+ break;
+ case 13:
+ IClassLow = HexagonII::HSIG_S2;
+ IClassHigh = HexagonII::HSIG_L2;
+ break;
+ case 14:
+ IClassLow = HexagonII::HSIG_S2;
+ IClassHigh = HexagonII::HSIG_S2;
+ break;
+ }
+ // Set the MCInst to be a duplex instruction. Which one doesn't matter.
+ MI.setOpcode(Hexagon::DuplexIClass0);
+ // Decode each instruction in the duplex.
+ // Create an MCInst for each instruction.
+ unsigned instLow = Instruction & 0x1fff;
+ unsigned instHigh = (Instruction >> 16) & 0x1fff;
+ unsigned opLow;
+ if (GetSubinstOpcode(IClassLow, instLow, opLow, os) !=
+ MCDisassembler::Success)
+ return MCDisassembler::Fail;
+ unsigned opHigh;
+ if (GetSubinstOpcode(IClassHigh, instHigh, opHigh, os) !=
+ MCDisassembler::Success)
+ return MCDisassembler::Fail;
+ MCInst *MILow = new (getContext()) MCInst;
+ MILow->setOpcode(opLow);
+ MCInst *MIHigh = new (getContext()) MCInst;
+ MIHigh->setOpcode(opHigh);
+ AddSubinstOperands(MILow, opLow, instLow);
+ AddSubinstOperands(MIHigh, opHigh, instHigh);
+ // see ConvertToSubInst() in
+ // lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+ // Add the duplex instruction MCInsts as operands to the passed in MCInst.
+ MCOperand OPLow = MCOperand::createInst(MILow);
+ MCOperand OPHigh = MCOperand::createInst(MIHigh);
+ MI.addOperand(OPLow);
+ MI.addOperand(OPHigh);
+ Complete = true;
+ } else {
+ if ((Instruction & HexagonII::INST_PARSE_MASK) ==
+ Complete = true;
+ // Calling the auto-generated decoder function.
+ Result =
+ decodeInstruction(DecoderTable32, MI, Instruction, Address, this, STI);
+ }
return Result;
+// These values are from and
+enum subInstBinaryValues {
+ V4_SA1_addi_BITS = 0x0000,
+ V4_SA1_addi_MASK = 0x1800,
+ V4_SA1_addrx_BITS = 0x1800,
+ V4_SA1_addrx_MASK = 0x1f00,
+ V4_SA1_addsp_BITS = 0x0c00,
+ V4_SA1_addsp_MASK = 0x1c00,
+ V4_SA1_and1_BITS = 0x1200,
+ V4_SA1_and1_MASK = 0x1f00,
+ V4_SA1_clrf_BITS = 0x1a70,
+ V4_SA1_clrf_MASK = 0x1e70,
+ V4_SA1_clrfnew_BITS = 0x1a50,
+ V4_SA1_clrfnew_MASK = 0x1e70,
+ V4_SA1_clrt_BITS = 0x1a60,
+ V4_SA1_clrt_MASK = 0x1e70,
+ V4_SA1_clrtnew_BITS = 0x1a40,
+ V4_SA1_clrtnew_MASK = 0x1e70,
+ V4_SA1_cmpeqi_BITS = 0x1900,
+ V4_SA1_cmpeqi_MASK = 0x1f00,
+ V4_SA1_combine0i_BITS = 0x1c00,
+ V4_SA1_combine0i_MASK = 0x1d18,
+ V4_SA1_combine1i_BITS = 0x1c08,
+ V4_SA1_combine1i_MASK = 0x1d18,
+ V4_SA1_combine2i_BITS = 0x1c10,
+ V4_SA1_combine2i_MASK = 0x1d18,
+ V4_SA1_combine3i_BITS = 0x1c18,
+ V4_SA1_combine3i_MASK = 0x1d18,
+ V4_SA1_combinerz_BITS = 0x1d08,
+ V4_SA1_combinerz_MASK = 0x1d08,
+ V4_SA1_combinezr_BITS = 0x1d00,
+ V4_SA1_combinezr_MASK = 0x1d08,
+ V4_SA1_dec_BITS = 0x1300,
+ V4_SA1_dec_MASK = 0x1f00,
+ V4_SA1_inc_BITS = 0x1100,
+ V4_SA1_inc_MASK = 0x1f00,
+ V4_SA1_seti_BITS = 0x0800,
+ V4_SA1_seti_MASK = 0x1c00,
+ V4_SA1_setin1_BITS = 0x1a00,
+ V4_SA1_setin1_MASK = 0x1e40,
+ V4_SA1_sxtb_BITS = 0x1500,
+ V4_SA1_sxtb_MASK = 0x1f00,
+ V4_SA1_sxth_BITS = 0x1400,
+ V4_SA1_sxth_MASK = 0x1f00,
+ V4_SA1_tfr_BITS = 0x1000,
+ V4_SA1_tfr_MASK = 0x1f00,
+ V4_SA1_zxtb_BITS = 0x1700,
+ V4_SA1_zxtb_MASK = 0x1f00,
+ V4_SA1_zxth_BITS = 0x1600,
+ V4_SA1_zxth_MASK = 0x1f00,
+ V4_SL1_loadri_io_BITS = 0x0000,
+ V4_SL1_loadri_io_MASK = 0x1000,
+ V4_SL1_loadrub_io_BITS = 0x1000,
+ V4_SL1_loadrub_io_MASK = 0x1000,
+ V4_SL2_deallocframe_BITS = 0x1f00,
+ V4_SL2_deallocframe_MASK = 0x1fc0,
+ V4_SL2_jumpr31_BITS = 0x1fc0,
+ V4_SL2_jumpr31_MASK = 0x1fc4,
+ V4_SL2_jumpr31_f_BITS = 0x1fc5,
+ V4_SL2_jumpr31_f_MASK = 0x1fc7,
+ V4_SL2_jumpr31_fnew_BITS = 0x1fc7,
+ V4_SL2_jumpr31_fnew_MASK = 0x1fc7,
+ V4_SL2_jumpr31_t_BITS = 0x1fc4,
+ V4_SL2_jumpr31_t_MASK = 0x1fc7,
+ V4_SL2_jumpr31_tnew_BITS = 0x1fc6,
+ V4_SL2_jumpr31_tnew_MASK = 0x1fc7,
+ V4_SL2_loadrb_io_BITS = 0x1000,
+ V4_SL2_loadrb_io_MASK = 0x1800,
+ V4_SL2_loadrd_sp_BITS = 0x1e00,
+ V4_SL2_loadrd_sp_MASK = 0x1f00,
+ V4_SL2_loadrh_io_BITS = 0x0000,
+ V4_SL2_loadrh_io_MASK = 0x1800,
+ V4_SL2_loadri_sp_BITS = 0x1c00,
+ V4_SL2_loadri_sp_MASK = 0x1e00,
+ V4_SL2_loadruh_io_BITS = 0x0800,
+ V4_SL2_loadruh_io_MASK = 0x1800,
+ V4_SL2_return_BITS = 0x1f40,
+ V4_SL2_return_MASK = 0x1fc4,
+ V4_SL2_return_f_BITS = 0x1f45,
+ V4_SL2_return_f_MASK = 0x1fc7,
+ V4_SL2_return_fnew_BITS = 0x1f47,
+ V4_SL2_return_fnew_MASK = 0x1fc7,
+ V4_SL2_return_t_BITS = 0x1f44,
+ V4_SL2_return_t_MASK = 0x1fc7,
+ V4_SL2_return_tnew_BITS = 0x1f46,
+ V4_SL2_return_tnew_MASK = 0x1fc7,
+ V4_SS1_storeb_io_BITS = 0x1000,
+ V4_SS1_storeb_io_MASK = 0x1000,
+ V4_SS1_storew_io_BITS = 0x0000,
+ V4_SS1_storew_io_MASK = 0x1000,
+ V4_SS2_allocframe_BITS = 0x1c00,
+ V4_SS2_allocframe_MASK = 0x1e00,
+ V4_SS2_storebi0_BITS = 0x1200,
+ V4_SS2_storebi0_MASK = 0x1f00,
+ V4_SS2_storebi1_BITS = 0x1300,
+ V4_SS2_storebi1_MASK = 0x1f00,
+ V4_SS2_stored_sp_BITS = 0x0a00,
+ V4_SS2_stored_sp_MASK = 0x1e00,
+ V4_SS2_storeh_io_BITS = 0x0000,
+ V4_SS2_storeh_io_MASK = 0x1800,
+ V4_SS2_storew_sp_BITS = 0x0800,
+ V4_SS2_storew_sp_MASK = 0x1e00,
+ V4_SS2_storewi0_BITS = 0x1000,
+ V4_SS2_storewi0_MASK = 0x1f00,
+ V4_SS2_storewi1_BITS = 0x1100,
+ V4_SS2_storewi1_MASK = 0x1f00
+static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op,
+ raw_ostream &os) {
+ switch (IClass) {
+ case HexagonII::HSIG_L1:
+ if ((inst & V4_SL1_loadri_io_MASK) == V4_SL1_loadri_io_BITS)
+ op = Hexagon::V4_SL1_loadri_io;
+ else if ((inst & V4_SL1_loadrub_io_MASK) == V4_SL1_loadrub_io_BITS)
+ op = Hexagon::V4_SL1_loadrub_io;
+ else {
+ os << "<unknown subinstruction>";
+ return MCDisassembler::Fail;
+ }
+ break;
+ case HexagonII::HSIG_L2:
+ if ((inst & V4_SL2_deallocframe_MASK) == V4_SL2_deallocframe_BITS)
+ op = Hexagon::V4_SL2_deallocframe;
+ else if ((inst & V4_SL2_jumpr31_MASK) == V4_SL2_jumpr31_BITS)
+ op = Hexagon::V4_SL2_jumpr31;
+ else if ((inst & V4_SL2_jumpr31_f_MASK) == V4_SL2_jumpr31_f_BITS)
+ op = Hexagon::V4_SL2_jumpr31_f;
+ else if ((inst & V4_SL2_jumpr31_fnew_MASK) == V4_SL2_jumpr31_fnew_BITS)
+ op = Hexagon::V4_SL2_jumpr31_fnew;
+ else if ((inst & V4_SL2_jumpr31_t_MASK) == V4_SL2_jumpr31_t_BITS)
+ op = Hexagon::V4_SL2_jumpr31_t;
+ else if ((inst & V4_SL2_jumpr31_tnew_MASK) == V4_SL2_jumpr31_tnew_BITS)
+ op = Hexagon::V4_SL2_jumpr31_tnew;
+ else if ((inst & V4_SL2_loadrb_io_MASK) == V4_SL2_loadrb_io_BITS)
+ op = Hexagon::V4_SL2_loadrb_io;
+ else if ((inst & V4_SL2_loadrd_sp_MASK) == V4_SL2_loadrd_sp_BITS)
+ op = Hexagon::V4_SL2_loadrd_sp;
+ else if ((inst & V4_SL2_loadrh_io_MASK) == V4_SL2_loadrh_io_BITS)
+ op = Hexagon::V4_SL2_loadrh_io;
+ else if ((inst & V4_SL2_loadri_sp_MASK) == V4_SL2_loadri_sp_BITS)
+ op = Hexagon::V4_SL2_loadri_sp;
+ else if ((inst & V4_SL2_loadruh_io_MASK) == V4_SL2_loadruh_io_BITS)
+ op = Hexagon::V4_SL2_loadruh_io;
+ else if ((inst & V4_SL2_return_MASK) == V4_SL2_return_BITS)
+ op = Hexagon::V4_SL2_return;
+ else if ((inst & V4_SL2_return_f_MASK) == V4_SL2_return_f_BITS)
+ op = Hexagon::V4_SL2_return_f;
+ else if ((inst & V4_SL2_return_fnew_MASK) == V4_SL2_return_fnew_BITS)
+ op = Hexagon::V4_SL2_return_fnew;
+ else if ((inst & V4_SL2_return_t_MASK) == V4_SL2_return_t_BITS)
+ op = Hexagon::V4_SL2_return_t;
+ else if ((inst & V4_SL2_return_tnew_MASK) == V4_SL2_return_tnew_BITS)
+ op = Hexagon::V4_SL2_return_tnew;
+ else {
+ os << "<unknown subinstruction>";
+ return MCDisassembler::Fail;
+ }
+ break;
+ case HexagonII::HSIG_A:
+ if ((inst & V4_SA1_addi_MASK) == V4_SA1_addi_BITS)
+ op = Hexagon::V4_SA1_addi;
+ else if ((inst & V4_SA1_addrx_MASK) == V4_SA1_addrx_BITS)
+ op = Hexagon::V4_SA1_addrx;
+ else if ((inst & V4_SA1_addsp_MASK) == V4_SA1_addsp_BITS)
+ op = Hexagon::V4_SA1_addsp;
+ else if ((inst & V4_SA1_and1_MASK) == V4_SA1_and1_BITS)
+ op = Hexagon::V4_SA1_and1;
+ else if ((inst & V4_SA1_clrf_MASK) == V4_SA1_clrf_BITS)
+ op = Hexagon::V4_SA1_clrf;
+ else if ((inst & V4_SA1_clrfnew_MASK) == V4_SA1_clrfnew_BITS)
+ op = Hexagon::V4_SA1_clrfnew;
+ else if ((inst & V4_SA1_clrt_MASK) == V4_SA1_clrt_BITS)
+ op = Hexagon::V4_SA1_clrt;
+ else if ((inst & V4_SA1_clrtnew_MASK) == V4_SA1_clrtnew_BITS)
+ op = Hexagon::V4_SA1_clrtnew;
+ else if ((inst & V4_SA1_cmpeqi_MASK) == V4_SA1_cmpeqi_BITS)
+ op = Hexagon::V4_SA1_cmpeqi;
+ else if ((inst & V4_SA1_combine0i_MASK) == V4_SA1_combine0i_BITS)
+ op = Hexagon::V4_SA1_combine0i;
+ else if ((inst & V4_SA1_combine1i_MASK) == V4_SA1_combine1i_BITS)
+ op = Hexagon::V4_SA1_combine1i;
+ else if ((inst & V4_SA1_combine2i_MASK) == V4_SA1_combine2i_BITS)
+ op = Hexagon::V4_SA1_combine2i;
+ else if ((inst & V4_SA1_combine3i_MASK) == V4_SA1_combine3i_BITS)
+ op = Hexagon::V4_SA1_combine3i;
+ else if ((inst & V4_SA1_combinerz_MASK) == V4_SA1_combinerz_BITS)
+ op = Hexagon::V4_SA1_combinerz;
+ else if ((inst & V4_SA1_combinezr_MASK) == V4_SA1_combinezr_BITS)
+ op = Hexagon::V4_SA1_combinezr;
+ else if ((inst & V4_SA1_dec_MASK) == V4_SA1_dec_BITS)
+ op = Hexagon::V4_SA1_dec;
+ else if ((inst & V4_SA1_inc_MASK) == V4_SA1_inc_BITS)
+ op = Hexagon::V4_SA1_inc;
+ else if ((inst & V4_SA1_seti_MASK) == V4_SA1_seti_BITS)
+ op = Hexagon::V4_SA1_seti;
+ else if ((inst & V4_SA1_setin1_MASK) == V4_SA1_setin1_BITS)
+ op = Hexagon::V4_SA1_setin1;
+ else if ((inst & V4_SA1_sxtb_MASK) == V4_SA1_sxtb_BITS)
+ op = Hexagon::V4_SA1_sxtb;
+ else if ((inst & V4_SA1_sxth_MASK) == V4_SA1_sxth_BITS)
+ op = Hexagon::V4_SA1_sxth;
+ else if ((inst & V4_SA1_tfr_MASK) == V4_SA1_tfr_BITS)
+ op = Hexagon::V4_SA1_tfr;
+ else if ((inst & V4_SA1_zxtb_MASK) == V4_SA1_zxtb_BITS)
+ op = Hexagon::V4_SA1_zxtb;
+ else if ((inst & V4_SA1_zxth_MASK) == V4_SA1_zxth_BITS)
+ op = Hexagon::V4_SA1_zxth;
+ else {
+ os << "<unknown subinstruction>";
+ return MCDisassembler::Fail;
+ }
+ break;
+ case HexagonII::HSIG_S1:
+ if ((inst & V4_SS1_storeb_io_MASK) == V4_SS1_storeb_io_BITS)
+ op = Hexagon::V4_SS1_storeb_io;
+ else if ((inst & V4_SS1_storew_io_MASK) == V4_SS1_storew_io_BITS)
+ op = Hexagon::V4_SS1_storew_io;
+ else {
+ os << "<unknown subinstruction>";
+ return MCDisassembler::Fail;
+ }
+ break;
+ case HexagonII::HSIG_S2:
+ if ((inst & V4_SS2_allocframe_MASK) == V4_SS2_allocframe_BITS)
+ op = Hexagon::V4_SS2_allocframe;
+ else if ((inst & V4_SS2_storebi0_MASK) == V4_SS2_storebi0_BITS)
+ op = Hexagon::V4_SS2_storebi0;
+ else if ((inst & V4_SS2_storebi1_MASK) == V4_SS2_storebi1_BITS)
+ op = Hexagon::V4_SS2_storebi1;
+ else if ((inst & V4_SS2_stored_sp_MASK) == V4_SS2_stored_sp_BITS)
+ op = Hexagon::V4_SS2_stored_sp;
+ else if ((inst & V4_SS2_storeh_io_MASK) == V4_SS2_storeh_io_BITS)
+ op = Hexagon::V4_SS2_storeh_io;
+ else if ((inst & V4_SS2_storew_sp_MASK) == V4_SS2_storew_sp_BITS)
+ op = Hexagon::V4_SS2_storew_sp;
+ else if ((inst & V4_SS2_storewi0_MASK) == V4_SS2_storewi0_BITS)
+ op = Hexagon::V4_SS2_storewi0;
+ else if ((inst & V4_SS2_storewi1_MASK) == V4_SS2_storewi1_BITS)
+ op = Hexagon::V4_SS2_storewi1;
+ else {
+ os << "<unknown subinstruction>";
+ return MCDisassembler::Fail;
+ }
+ break;
+ default:
+ os << "<unknown>";
+ return MCDisassembler::Fail;
+ }
+ return MCDisassembler::Success;
+static unsigned getRegFromSubinstEncoding(unsigned encoded_reg) {
+ if (encoded_reg < 8)
+ return Hexagon::R0 + encoded_reg;
+ else if (encoded_reg < 16)
+ return Hexagon::R0 + encoded_reg + 8;
+ return Hexagon::NoRegister;
+static unsigned getDRegFromSubinstEncoding(unsigned encoded_dreg) {
+ if (encoded_dreg < 4)
+ return Hexagon::D0 + encoded_dreg;
+ else if (encoded_dreg < 8)
+ return Hexagon::D0 + encoded_dreg + 4;
+ return Hexagon::NoRegister;
+static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
+ int64_t operand;
+ MCOperand Op;
+ switch (opcode) {
+ case Hexagon::V4_SL2_deallocframe:
+ case Hexagon::V4_SL2_jumpr31:
+ case Hexagon::V4_SL2_jumpr31_f:
+ case Hexagon::V4_SL2_jumpr31_fnew:
+ case Hexagon::V4_SL2_jumpr31_t:
+ case Hexagon::V4_SL2_jumpr31_tnew:
+ case Hexagon::V4_SL2_return:
+ case Hexagon::V4_SL2_return_f:
+ case Hexagon::V4_SL2_return_fnew:
+ case Hexagon::V4_SL2_return_t:
+ case Hexagon::V4_SL2_return_tnew:
+ // no operands for these instructions
+ break;
+ case Hexagon::V4_SS2_allocframe:
+ // u 8-4{5_3}
+ operand = ((inst & 0x1f0) >> 4) << 3;
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SL1_loadri_io:
+ // Rd 3-0, Rs 7-4, u 11-8{4_2}
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = (inst & 0xf00) >> 6;
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SL1_loadrub_io:
+ // Rd 3-0, Rs 7-4, u 11-8
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = (inst & 0xf00) >> 8;
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SL2_loadrb_io:
+ // Rd 3-0, Rs 7-4, u 10-8
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = (inst & 0x700) >> 8;
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SL2_loadrh_io:
+ case Hexagon::V4_SL2_loadruh_io:
+ // Rd 3-0, Rs 7-4, u 10-8{3_1}
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = ((inst & 0x700) >> 8) << 1;
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SL2_loadrd_sp:
+ // Rdd 2-0, u 7-3{5_3}
+ operand = getDRegFromSubinstEncoding(inst & 0x7);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = ((inst & 0x0f8) >> 3) << 3;
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SL2_loadri_sp:
+ // Rd 3-0, u 8-4{5_2}
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = ((inst & 0x1f0) >> 4) << 2;
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SA1_addi:
+ // Rx 3-0 (x2), s7 10-4
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ MI->addOperand(Op);
+ operand = SignExtend64<7>((inst & 0x7f0) >> 4);
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SA1_addrx:
+ // Rx 3-0 (x2), Rs 7-4
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ MI->addOperand(Op);
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ case Hexagon::V4_SA1_and1:
+ case Hexagon::V4_SA1_dec:
+ case Hexagon::V4_SA1_inc:
+ case Hexagon::V4_SA1_sxtb:
+ case Hexagon::V4_SA1_sxth:
+ case Hexagon::V4_SA1_tfr:
+ case Hexagon::V4_SA1_zxtb:
+ case Hexagon::V4_SA1_zxth:
+ // Rd 3-0, Rs 7-4
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SA1_addsp:
+ // Rd 3-0, u 9-4{6_2}
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = ((inst & 0x3f0) >> 4) << 2;
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SA1_seti:
+ // Rd 3-0, u 9-4
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = (inst & 0x3f0) >> 4;
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SA1_clrf:
+ case Hexagon::V4_SA1_clrfnew:
+ case Hexagon::V4_SA1_clrt:
+ case Hexagon::V4_SA1_clrtnew:
+ case Hexagon::V4_SA1_setin1:
+ // Rd 3-0
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SA1_cmpeqi:
+ // Rs 7-4, u 1-0
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = inst & 0x3;
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SA1_combine0i:
+ case Hexagon::V4_SA1_combine1i:
+ case Hexagon::V4_SA1_combine2i:
+ case Hexagon::V4_SA1_combine3i:
+ // Rdd 2-0, u 6-5
+ operand = getDRegFromSubinstEncoding(inst & 0x7);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = (inst & 0x060) >> 5;
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SA1_combinerz:
+ case Hexagon::V4_SA1_combinezr:
+ // Rdd 2-0, Rs 7-4
+ operand = getDRegFromSubinstEncoding(inst & 0x7);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SS1_storeb_io:
+ // Rs 7-4, u 11-8, Rt 3-0
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = (inst & 0xf00) >> 8;
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SS1_storew_io:
+ // Rs 7-4, u 11-8{4_2}, Rt 3-0
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = ((inst & 0xf00) >> 8) << 2;
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SS2_storebi0:
+ case Hexagon::V4_SS2_storebi1:
+ // Rs 7-4, u 3-0
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = inst & 0xf;
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SS2_storewi0:
+ case Hexagon::V4_SS2_storewi1:
+ // Rs 7-4, u 3-0{4_2}
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = (inst & 0xf) << 2;
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SS2_stored_sp:
+ // s 8-3{6_3}, Rtt 2-0
+ operand = SignExtend64<9>(((inst & 0x1f8) >> 3) << 3);
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ operand = getDRegFromSubinstEncoding(inst & 0x7);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ case Hexagon::V4_SS2_storeh_io:
+ // Rs 7-4, u 10-8{3_1}, Rt 3-0
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = ((inst & 0x700) >> 8) << 1;
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SS2_storew_sp:
+ // u 8-4{5_2}, Rd 3-0
+ operand = ((inst & 0x1f0) >> 4) << 2;
+ Op = MCOperand::createImm(operand);
+ MI->addOperand(Op);
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ break;
+ default:
+ // don't crash with an invalid subinstruction
+ // llvm_unreachable("Invalid subinstruction in duplex instruction");
+ break;
+ }
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index dfe79f9..6e2ecaf 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -76,4 +76,11 @@ namespace llvm {
// Maximum number of words and instructions in a packet.
+// Minimum number of instructions in an end-loop packet.
+// Maximum number of instructions in a packet before shuffling,
+// including a compound one or a duplex or an extender.
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index e9491ba..05728d2 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -20,6 +20,7 @@
#include "HexagonTargetMachine.h"
#include "MCTargetDesc/HexagonInstPrinter.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -78,14 +79,14 @@ void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
O << MO.getImm();
case MachineOperand::MO_MachineBasicBlock:
- O << *MO.getMBB()->getSymbol();
+ MO.getMBB()->getSymbol()->print(O, MAI);
case MachineOperand::MO_ConstantPoolIndex:
- O << *GetCPISymbol(MO.getIndex());
+ GetCPISymbol(MO.getIndex())->print(O, MAI);
case MachineOperand::MO_GlobalAddress:
// Computing the address of a global symbol, not calling it.
- O << *getSymbol(MO.getGlobal());
+ getSymbol(MO.getGlobal())->print(O, MAI);
printOffset(MO.getOffset(), O);
@@ -177,49 +178,40 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
/// the current output stream.
void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- if (MI->isBundle()) {
- std::vector<MachineInstr const *> BundleMIs;
+ MCInst MCB;
+ MCB.setOpcode(Hexagon::BUNDLE);
+ MCB.addOperand(MCOperand::createImm(0));
- const MachineBasicBlock *MBB = MI->getParent();
+ if (MI->isBundle()) {
+ const MachineBasicBlock* MBB = MI->getParent();
MachineBasicBlock::const_instr_iterator MII = MI;
- ++MII;
- unsigned int IgnoreCount = 0;
- while (MII != MBB->end() && MII->isInsideBundle()) {
- const MachineInstr *MInst = MII;
- if (MInst->getOpcode() == TargetOpcode::DBG_VALUE ||
- MInst->getOpcode() == TargetOpcode::IMPLICIT_DEF) {
- IgnoreCount++;
- ++MII;
- continue;
+ unsigned IgnoreCount = 0;
+ for (++MII; MII != MBB->end() && MII->isInsideBundle(); ++MII) {
+ if (MII->getOpcode() == TargetOpcode::DBG_VALUE ||
+ MII->getOpcode() == TargetOpcode::IMPLICIT_DEF)
+ ++IgnoreCount;
+ else {
+ HexagonLowerToMC(MII, MCB, *this);
- // BundleMIs.push_back(&*MII);
- BundleMIs.push_back(MInst);
- ++MII;
- }
- unsigned Size = BundleMIs.size();
- assert((Size + IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!");
- for (unsigned Index = 0; Index < Size; Index++) {
- MCInst MCI;
- HexagonLowerToMC(BundleMIs[Index], MCI, *this);
- HexagonMCInstrInfo::AppendImplicitOperands(MCI);
- HexagonMCInstrInfo::setPacketBegin(MCI, Index == 0);
- HexagonMCInstrInfo::setPacketEnd(MCI, Index == (Size - 1));
- EmitToStreamer(*OutStreamer, MCI);
else {
- MCInst MCI;
- HexagonLowerToMC(MI, MCI, *this);
- HexagonMCInstrInfo::AppendImplicitOperands(MCI);
- if (MI->getOpcode() == Hexagon::ENDLOOP0) {
- HexagonMCInstrInfo::setPacketBegin(MCI, true);
- HexagonMCInstrInfo::setPacketEnd(MCI, true);
- }
- EmitToStreamer(*OutStreamer, MCI);
+ HexagonLowerToMC(MI, MCB, *this);
+ HexagonMCInstrInfo::padEndloop(MCB);
- return;
+ // Examine the packet and try to find instructions that can be converted
+ // to compounds.
+ HexagonMCInstrInfo::tryCompound(*Subtarget->getInstrInfo(),
+ OutStreamer->getContext(), MCB);
+ // Examine the packet and convert pairs of instructions to duplex
+ // instructions when possible.
+ SmallVector<DuplexCandidate, 8> possibleDuplexes;
+ possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(
+ *Subtarget->getInstrInfo(), MCB);
+ HexagonMCShuffle(*Subtarget->getInstrInfo(), *Subtarget,
+ OutStreamer->getContext(), MCB, possibleDuplexes);
+ EmitToStreamer(*OutStreamer, MCB);
extern "C" void LLVMInitializeHexagonAsmPrinter() {
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 0885a79..868f87e 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -201,17 +201,17 @@ namespace {
// Check individual operands.
- for (ConstMIOperands Mo(MI); Mo.isValid(); ++Mo) {
+ for (const MachineOperand &MO : MI->operands()) {
// While the presence of a frame index does not prove that a stack
// frame will be required, all frame indexes should be within alloc-
// frame/deallocframe. Otherwise, the code that translates a frame
// index into an offset would have to be aware of the placement of
// the frame creation/destruction instructions.
- if (Mo->isFI())
+ if (MO.isFI())
return true;
- if (!Mo->isReg())
+ if (!MO.isReg())
- unsigned R = Mo->getReg();
+ unsigned R = MO.getReg();
// Virtual registers will need scavenging, which then may require
// a stack slot.
if (TargetRegisterInfo::isVirtualRegister(R))
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index ed5676c..74d92ae 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -2370,7 +2370,8 @@ bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
/// isLegalAddressingMode - Return true if the addressing mode represented by
/// AM is legal for this target, for a load/store of the specified type.
bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
+ Type *Ty,
+ unsigned AS) const {
// Allows a signed-extended 11-bit immediate field.
if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1)
return false;
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 584c2c5..b80e847 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -198,7 +198,8 @@ bool isPositiveHalfWord(SDNode *N);
/// The type may be VoidTy, in which case only return true if the addressing
/// mode is legal for a load/store of any legal type.
/// TODO: Handle pre/postinc as well.
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
diff --git a/lib/Target/Hexagon/ b/lib/Target/Hexagon/
index 36a7e9f..44bab29 100644
--- a/lib/Target/Hexagon/
+++ b/lib/Target/Hexagon/
@@ -66,10 +66,8 @@ def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd)
class OpcodeHexagon {
field bits<32> Inst = ?; // Default to an invalid insn.
bits<4> IClass = 0; // ICLASS
- bits<2> IParse = 0; // Parse bits.
let Inst{31-28} = IClass;
- let Inst{15-14} = IParse;
bits<1> zero = 0;
diff --git a/lib/Target/Hexagon/ b/lib/Target/Hexagon/
index 7f7b2c9..db83ef6 100644
--- a/lib/Target/Hexagon/
+++ b/lib/Target/Hexagon/
@@ -146,6 +146,11 @@ class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []>
: InstHexagon<outs, ins, asmstr, pattern, "", EXTENDER_tc_1_SLOT0123,
TypePREFIX>, OpcodeHexagon;
+class SUBInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypeDUPLEX>,
+ OpcodeHexagon;
class CJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
: InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>,
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 49b4517..e566a97 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -779,10 +779,9 @@ HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
return false;
-MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- int FI) const {
+MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, int FI) const {
// Hexagon_TODO: Implement.
return nullptr;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 0239cab..a7ae65e 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -114,10 +114,12 @@ public:
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt,
int FrameIndex) const override;
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt,
MachineInstr *LoadMI) const override {
return nullptr;
diff --git a/lib/Target/Hexagon/ b/lib/Target/Hexagon/
index 8b667c6..65b0f49 100644
--- a/lib/Target/Hexagon/
+++ b/lib/Target/Hexagon/
@@ -4263,3 +4263,7 @@ def J4_jumpsetr: CJInst <
let Inst{19-16} = Rs;
let Inst{7-1} = r9_2{8-2};
+// Duplex instructions
+include ""
diff --git a/lib/Target/Hexagon/ b/lib/Target/Hexagon/
new file mode 100644
index 0000000..0ca95e9
--- /dev/null
+++ b/lib/Target/Hexagon/
@@ -0,0 +1,728 @@
+//=- - Target Desc. for Hexagon Target -*- tablegen -*-=//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file describes the Hexagon duplex instructions.
+// SA1_combine1i: Combines.
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_combine1i: SUBInst <
+ (outs DoubleRegs:$Rdd),
+ (ins u2Imm:$u2),
+ "$Rdd = combine(#1, #$u2)"> {
+ bits<3> Rdd;
+ bits<2> u2;
+ let Inst{12-10} = 0b111;
+ let Inst{8} = 0b0;
+ let Inst{4-3} = 0b01;
+ let Inst{2-0} = Rdd;
+ let Inst{6-5} = u2;
+ }
+// SL2_jumpr31_f: Indirect conditional jump if false.
+// SL2_jumpr31_f -> SL2_jumpr31_fnew
+let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+def V4_SL2_jumpr31_f: SUBInst <
+ (outs ),
+ (ins ),
+ "if (!p0) jumpr r31"> {
+ let Inst{12-6} = 0b1111111;
+ let Inst{2-0} = 0b101;
+ }
+// SL2_deallocframe: Deallocate stack frame.
+let Defs = [R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in
+def V4_SL2_deallocframe: SUBInst <
+ (outs ),
+ (ins ),
+ "deallocframe"> {
+ let Inst{12-6} = 0b1111100;
+ let Inst{2} = 0b0;
+ }
+// SL2_return_f: Deallocate stack frame and return.
+// SL2_return_f -> SL2_return_fnew
+let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
+def V4_SL2_return_f: SUBInst <
+ (outs ),
+ (ins ),
+ "if (!p0) dealloc_return"> {
+ let Inst{12-6} = 0b1111101;
+ let Inst{2-0} = 0b101;
+ }
+// SA1_combine3i: Combines.
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_combine3i: SUBInst <
+ (outs DoubleRegs:$Rdd),
+ (ins u2Imm:$u2),
+ "$Rdd = combine(#3, #$u2)"> {
+ bits<3> Rdd;
+ bits<2> u2;
+ let Inst{12-10} = 0b111;
+ let Inst{8} = 0b0;
+ let Inst{4-3} = 0b11;
+ let Inst{2-0} = Rdd;
+ let Inst{6-5} = u2;
+ }
+// SS2_storebi0: Store byte.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in
+def V4_SS2_storebi0: SUBInst <
+ (outs ),
+ (ins IntRegs:$Rs, u4_0Imm:$u4_0),
+ "memb($Rs + #$u4_0)=#0"> {
+ bits<4> Rs;
+ bits<4> u4_0;
+ let Inst{12-8} = 0b10010;
+ let Inst{7-4} = Rs;
+ let Inst{3-0} = u4_0;
+ }
+// SA1_clrtnew: Clear if true.
+let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_clrtnew: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins ),
+ "if ( $Rd = #0"> {
+ bits<4> Rd;
+ let Inst{12-9} = 0b1101;
+ let Inst{6-4} = 0b100;
+ let Inst{3-0} = Rd;
+ }
+// SL2_loadruh_io: Load half.
+let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in
+def V4_SL2_loadruh_io: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, u3_1Imm:$u3_1),
+ "$Rd = memuh($Rs + #$u3_1)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ bits<4> u3_1;
+ let Inst{12-11} = 0b01;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ let Inst{10-8} = u3_1{3-1};
+ }
+// SL2_jumpr31_tnew: Indirect conditional jump if true.
+let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+def V4_SL2_jumpr31_tnew: SUBInst <
+ (outs ),
+ (ins ),
+ "if ( jumpr:nt r31"> {
+ let Inst{12-6} = 0b1111111;
+ let Inst{2-0} = 0b110;
+ }
+// SA1_addi: Add.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 1, opExtentBits = 7, opExtendable = 2 in
+def V4_SA1_addi: SUBInst <
+ (outs IntRegs:$Rx),
+ (ins IntRegs:$_src_, s7Ext:$s7),
+ "$Rx = add($_src_, #$s7)" ,
+ [] ,
+ "$_src_ = $Rx"> {
+ bits<4> Rx;
+ bits<7> s7;
+ let Inst{12-11} = 0b00;
+ let Inst{3-0} = Rx;
+ let Inst{10-4} = s7;
+ }
+// SL1_loadrub_io: Load byte.
+let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in
+def V4_SL1_loadrub_io: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, u4_0Imm:$u4_0),
+ "$Rd = memub($Rs + #$u4_0)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ bits<4> u4_0;
+ let Inst{12} = 0b1;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ let Inst{11-8} = u4_0;
+ }
+// SL1_loadri_io: Load word.
+let isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in
+def V4_SL1_loadri_io: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, u4_2Imm:$u4_2),
+ "$Rd = memw($Rs + #$u4_2)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ bits<6> u4_2;
+ let Inst{12} = 0b0;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ let Inst{11-8} = u4_2{5-2};
+ }
+// SA1_cmpeqi: Compareimmed.
+let Defs = [P0], isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_cmpeqi: SUBInst <
+ (outs ),
+ (ins IntRegs:$Rs, u2Imm:$u2),
+ "p0 = cmp.eq($Rs, #$u2)"> {
+ bits<4> Rs;
+ bits<2> u2;
+ let Inst{12-8} = 0b11001;
+ let Inst{7-4} = Rs;
+ let Inst{1-0} = u2;
+ }
+// SA1_combinerz: Combines.
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_combinerz: SUBInst <
+ (outs DoubleRegs:$Rdd),
+ (ins IntRegs:$Rs),
+ "$Rdd = combine($Rs, #0)"> {
+ bits<3> Rdd;
+ bits<4> Rs;
+ let Inst{12-10} = 0b111;
+ let Inst{8} = 0b1;
+ let Inst{3} = 0b1;
+ let Inst{2-0} = Rdd;
+ let Inst{7-4} = Rs;
+ }
+// SL2_return_t: Deallocate stack frame and return.
+// SL2_return_t -> SL2_return_tnew
+let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
+def V4_SL2_return_t: SUBInst <
+ (outs ),
+ (ins ),
+ "if (p0) dealloc_return"> {
+ let Inst{12-6} = 0b1111101;
+ let Inst{2-0} = 0b100;
+ }
+// SS2_allocframe: Allocate stack frame.
+let Defs = [R29, R30], Uses = [R30, R31, R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in
+def V4_SS2_allocframe: SUBInst <
+ (outs ),
+ (ins u5_3Imm:$u5_3),
+ "allocframe(#$u5_3)"> {
+ bits<8> u5_3;
+ let Inst{12-9} = 0b1110;
+ let Inst{8-4} = u5_3{7-3};
+ }
+// SS2_storeh_io: Store half.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = HalfWordAccess in
+def V4_SS2_storeh_io: SUBInst <
+ (outs ),
+ (ins IntRegs:$Rs, u3_1Imm:$u3_1, IntRegs:$Rt),
+ "memh($Rs + #$u3_1) = $Rt"> {
+ bits<4> Rs;
+ bits<4> u3_1;
+ bits<4> Rt;
+ let Inst{12-11} = 0b00;
+ let Inst{7-4} = Rs;
+ let Inst{10-8} = u3_1{3-1};
+ let Inst{3-0} = Rt;
+ }
+// SS2_storewi0: Store word.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
+def V4_SS2_storewi0: SUBInst <
+ (outs ),
+ (ins IntRegs:$Rs, u4_2Imm:$u4_2),
+ "memw($Rs + #$u4_2)=#0"> {
+ bits<4> Rs;
+ bits<6> u4_2;
+ let Inst{12-8} = 0b10000;
+ let Inst{7-4} = Rs;
+ let Inst{3-0} = u4_2{5-2};
+ }
+// SS2_storewi1: Store word.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
+def V4_SS2_storewi1: SUBInst <
+ (outs ),
+ (ins IntRegs:$Rs, u4_2Imm:$u4_2),
+ "memw($Rs + #$u4_2)=#1"> {
+ bits<4> Rs;
+ bits<6> u4_2;
+ let Inst{12-8} = 0b10001;
+ let Inst{7-4} = Rs;
+ let Inst{3-0} = u4_2{5-2};
+ }
+// SL2_jumpr31: Indirect conditional jump if true.
+let Defs = [PC], Uses = [R31], isCodeGenOnly = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+def V4_SL2_jumpr31: SUBInst <
+ (outs ),
+ (ins ),
+ "jumpr r31"> {
+ let Inst{12-6} = 0b1111111;
+ let Inst{2} = 0b0;
+ }
+// SA1_combinezr: Combines.
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_combinezr: SUBInst <
+ (outs DoubleRegs:$Rdd),
+ (ins IntRegs:$Rs),
+ "$Rdd = combine(#0, $Rs)"> {
+ bits<3> Rdd;
+ bits<4> Rs;
+ let Inst{12-10} = 0b111;
+ let Inst{8} = 0b1;
+ let Inst{3} = 0b0;
+ let Inst{2-0} = Rdd;
+ let Inst{7-4} = Rs;
+ }
+// SL2_loadrh_io: Load half.
+let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in
+def V4_SL2_loadrh_io: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, u3_1Imm:$u3_1),
+ "$Rd = memh($Rs + #$u3_1)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ bits<4> u3_1;
+ let Inst{12-11} = 0b00;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ let Inst{10-8} = u3_1{3-1};
+ }
+// SA1_addrx: Add.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_addrx: SUBInst <
+ (outs IntRegs:$Rx),
+ (ins IntRegs:$_src_, IntRegs:$Rs),
+ "$Rx = add($_src_, $Rs)" ,
+ [] ,
+ "$_src_ = $Rx"> {
+ bits<4> Rx;
+ bits<4> Rs;
+ let Inst{12-8} = 0b11000;
+ let Inst{3-0} = Rx;
+ let Inst{7-4} = Rs;
+ }
+// SA1_setin1: Set to -1.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_setin1: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins ),
+ "$Rd = #-1"> {
+ bits<4> Rd;
+ let Inst{12-9} = 0b1101;
+ let Inst{6} = 0b0;
+ let Inst{3-0} = Rd;
+ }
+// SA1_sxth: Sxth.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_sxth: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs),
+ "$Rd = sxth($Rs)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ let Inst{12-8} = 0b10100;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ }
+// SA1_combine0i: Combines.
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_combine0i: SUBInst <
+ (outs DoubleRegs:$Rdd),
+ (ins u2Imm:$u2),
+ "$Rdd = combine(#0, #$u2)"> {
+ bits<3> Rdd;
+ bits<2> u2;
+ let Inst{12-10} = 0b111;
+ let Inst{8} = 0b0;
+ let Inst{4-3} = 0b00;
+ let Inst{2-0} = Rdd;
+ let Inst{6-5} = u2;
+ }
+// SA1_combine2i: Combines.
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_combine2i: SUBInst <
+ (outs DoubleRegs:$Rdd),
+ (ins u2Imm:$u2),
+ "$Rdd = combine(#2, #$u2)"> {
+ bits<3> Rdd;
+ bits<2> u2;
+ let Inst{12-10} = 0b111;
+ let Inst{8} = 0b0;
+ let Inst{4-3} = 0b10;
+ let Inst{2-0} = Rdd;
+ let Inst{6-5} = u2;
+ }
+// SA1_sxtb: Sxtb.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_sxtb: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs),
+ "$Rd = sxtb($Rs)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ let Inst{12-8} = 0b10101;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ }
+// SA1_clrf: Clear if false.
+// SA1_clrf -> SA1_clrfnew
+let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_clrf: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins ),
+ "if (!p0) $Rd = #0"> {
+ bits<4> Rd;
+ let Inst{12-9} = 0b1101;
+ let Inst{6-4} = 0b111;
+ let Inst{3-0} = Rd;
+ }
+// SL2_loadrb_io: Load byte.
+let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in
+def V4_SL2_loadrb_io: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, u3_0Imm:$u3_0),
+ "$Rd = memb($Rs + #$u3_0)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ bits<3> u3_0;
+ let Inst{12-11} = 0b10;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ let Inst{10-8} = u3_0;
+ }
+// SA1_tfr: Tfr.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_tfr: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs),
+ "$Rd = $Rs"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ let Inst{12-8} = 0b10000;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ }
+// SL2_loadrd_sp: Load dword.
+let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in
+def V4_SL2_loadrd_sp: SUBInst <
+ (outs DoubleRegs:$Rdd),
+ (ins u5_3Imm:$u5_3),
+ "$Rdd = memd(r29 + #$u5_3)"> {
+ bits<3> Rdd;
+ bits<8> u5_3;
+ let Inst{12-8} = 0b11110;
+ let Inst{2-0} = Rdd;
+ let Inst{7-3} = u5_3{7-3};
+ }
+// SA1_and1: And #1.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_and1: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs),
+ "$Rd = and($Rs, #1)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ let Inst{12-8} = 0b10010;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ }
+// SS2_storebi1: Store byte.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in
+def V4_SS2_storebi1: SUBInst <
+ (outs ),
+ (ins IntRegs:$Rs, u4_0Imm:$u4_0),
+ "memb($Rs + #$u4_0)=#1"> {
+ bits<4> Rs;
+ bits<4> u4_0;
+ let Inst{12-8} = 0b10011;
+ let Inst{7-4} = Rs;
+ let Inst{3-0} = u4_0;
+ }
+// SA1_inc: Inc.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_inc: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs),
+ "$Rd = add($Rs, #1)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ let Inst{12-8} = 0b10001;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ }
+// SS2_stored_sp: Store dword.
+let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in
+def V4_SS2_stored_sp: SUBInst <
+ (outs ),
+ (ins s6_3Imm:$s6_3, DoubleRegs:$Rtt),
+ "memd(r29 + #$s6_3) = $Rtt"> {
+ bits<9> s6_3;
+ bits<3> Rtt;
+ let Inst{12-9} = 0b0101;
+ let Inst{8-3} = s6_3{8-3};
+ let Inst{2-0} = Rtt;
+ }
+// SS2_storew_sp: Store word.
+let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
+def V4_SS2_storew_sp: SUBInst <
+ (outs ),
+ (ins u5_2Imm:$u5_2, IntRegs:$Rt),
+ "memw(r29 + #$u5_2) = $Rt"> {
+ bits<7> u5_2;
+ bits<4> Rt;
+ let Inst{12-9} = 0b0100;
+ let Inst{8-4} = u5_2{6-2};
+ let Inst{3-0} = Rt;
+ }
+// SL2_jumpr31_fnew: Indirect conditional jump if false.
+let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+def V4_SL2_jumpr31_fnew: SUBInst <
+ (outs ),
+ (ins ),
+ "if (! jumpr:nt r31"> {
+ let Inst{12-6} = 0b1111111;
+ let Inst{2-0} = 0b111;
+ }
+// SA1_clrt: Clear if true.
+// SA1_clrt -> SA1_clrtnew
+let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_clrt: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins ),
+ "if (p0) $Rd = #0"> {
+ bits<4> Rd;
+ let Inst{12-9} = 0b1101;
+ let Inst{6-4} = 0b110;
+ let Inst{3-0} = Rd;
+ }
+// SL2_return: Deallocate stack frame and return.
+let Defs = [PC, R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
+def V4_SL2_return: SUBInst <
+ (outs ),
+ (ins ),
+ "dealloc_return"> {
+ let Inst{12-6} = 0b1111101;
+ let Inst{2} = 0b0;
+ }
+// SA1_dec: Dec.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_dec: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs),
+ "$Rd = add($Rs,#-1)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ let Inst{12-8} = 0b10011;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ }
+// SA1_seti: Set immed.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 0, opExtentBits = 6, opExtendable = 1 in
+def V4_SA1_seti: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins u6Ext:$u6),
+ "$Rd = #$u6"> {
+ bits<4> Rd;
+ bits<6> u6;
+ let Inst{12-10} = 0b010;
+ let Inst{3-0} = Rd;
+ let Inst{9-4} = u6;
+ }
+// SL2_jumpr31_t: Indirect conditional jump if true.
+// SL2_jumpr31_t -> SL2_jumpr31_tnew
+let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+def V4_SL2_jumpr31_t: SUBInst <
+ (outs ),
+ (ins ),
+ "if (p0) jumpr r31"> {
+ let Inst{12-6} = 0b1111111;
+ let Inst{2-0} = 0b100;
+ }
+// SA1_clrfnew: Clear if false.
+let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_clrfnew: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins ),
+ "if (! $Rd = #0"> {
+ bits<4> Rd;
+ let Inst{12-9} = 0b1101;
+ let Inst{6-4} = 0b101;
+ let Inst{3-0} = Rd;
+ }
+// SS1_storew_io: Store word.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
+def V4_SS1_storew_io: SUBInst <
+ (outs ),
+ (ins IntRegs:$Rs, u4_2Imm:$u4_2, IntRegs:$Rt),
+ "memw($Rs + #$u4_2) = $Rt"> {
+ bits<4> Rs;
+ bits<6> u4_2;
+ bits<4> Rt;
+ let Inst{12} = 0b0;
+ let Inst{7-4} = Rs;
+ let Inst{11-8} = u4_2{5-2};
+ let Inst{3-0} = Rt;
+ }
+// SA1_zxtb: Zxtb.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_zxtb: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs),
+ "$Rd = and($Rs, #255)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ let Inst{12-8} = 0b10111;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ }
+// SA1_addsp: Add.
+let Uses = [R29], isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_addsp: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins u6_2Imm:$u6_2),
+ "$Rd = add(r29, #$u6_2)"> {
+ bits<4> Rd;
+ bits<8> u6_2;
+ let Inst{12-10} = 0b011;
+ let Inst{3-0} = Rd;
+ let Inst{9-4} = u6_2{7-2};
+ }
+// SL2_loadri_sp: Load word.
+let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in
+def V4_SL2_loadri_sp: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins u5_2Imm:$u5_2),
+ "$Rd = memw(r29 + #$u5_2)"> {
+ bits<4> Rd;
+ bits<7> u5_2;
+ let Inst{12-9} = 0b1110;
+ let Inst{3-0} = Rd;
+ let Inst{8-4} = u5_2{6-2};
+ }
+// SS1_storeb_io: Store byte.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in
+def V4_SS1_storeb_io: SUBInst <
+ (outs ),
+ (ins IntRegs:$Rs, u4_0Imm:$u4_0, IntRegs:$Rt),
+ "memb($Rs + #$u4_0) = $Rt"> {
+ bits<4> Rs;
+ bits<4> u4_0;
+ bits<4> Rt;
+ let Inst{12} = 0b1;
+ let Inst{7-4} = Rs;
+ let Inst{11-8} = u4_0;
+ let Inst{3-0} = Rt;
+ }
+// SL2_return_tnew: Deallocate stack frame and return.
+let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
+def V4_SL2_return_tnew: SUBInst <
+ (outs ),
+ (ins ),
+ "if ( dealloc_return:nt"> {
+ let Inst{12-6} = 0b1111101;
+ let Inst{2-0} = 0b110;
+ }
+// SL2_return_fnew: Deallocate stack frame and return.
+let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
+def V4_SL2_return_fnew: SUBInst <
+ (outs ),
+ (ins ),
+ "if (! dealloc_return:nt"> {
+ let Inst{12-6} = 0b1111101;
+ let Inst{2-0} = 0b111;
+ }
+// SA1_zxth: Zxth.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_zxth: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs),
+ "$Rd = zxth($Rs)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ let Inst{12-8} = 0b10110;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ }
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
index 535d1f9..75189b6 100644
--- a/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -15,9 +15,12 @@
#include "Hexagon.h"
#include "HexagonAsmPrinter.h"
#include "HexagonMachineFunctionInfo.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -28,19 +31,30 @@ static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol,
MCContext &MC = Printer.OutContext;
const MCExpr *ME;
- ME = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, MC);
+ ME = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, MC);
if (!MO.isJTI() && MO.getOffset())
- ME = MCBinaryExpr::CreateAdd(ME, MCConstantExpr::Create(MO.getOffset(), MC),
+ ME = MCBinaryExpr::createAdd(ME, MCConstantExpr::create(MO.getOffset(), MC),
return (MCOperand::createExpr(ME));
// Create an MCInst from a MachineInstr
-void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCI,
+void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB,
HexagonAsmPrinter& AP) {
- MCI.setOpcode(MI->getOpcode());
+ if(MI->getOpcode() == Hexagon::ENDLOOP0){
+ HexagonMCInstrInfo::setInnerLoop(MCB);
+ return;
+ }
+ if(MI->getOpcode() == Hexagon::ENDLOOP1){
+ HexagonMCInstrInfo::setOuterLoop(MCB);
+ return;
+ }
+ MCInst* MCI = new (AP.OutContext) MCInst;
+ MCI->setOpcode(MI->getOpcode());
+ assert(MCI->getOpcode() == static_cast<unsigned>(MI->getOpcode()) &&
+ "MCI opcode should have been set on construction");
for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) {
const MachineOperand &MO = MI->getOperand(i);
@@ -67,7 +81,7 @@ void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCI,
case MachineOperand::MO_MachineBasicBlock:
MCO = MCOperand::createExpr
- (MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(),
+ (MCSymbolRefExpr::create(MO.getMBB()->getSymbol(),
case MachineOperand::MO_GlobalAddress:
@@ -88,6 +102,7 @@ void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCI,
- MCI.addOperand(MCO);
+ MCI->addOperand(MCO);
+ MCB.addOperand(MCOperand::createInst(MCI));
diff --git a/lib/Target/Hexagon/ b/lib/Target/Hexagon/
index b7f364e..be8204b 100644
--- a/lib/Target/Hexagon/
+++ b/lib/Target/Hexagon/
@@ -27,6 +27,7 @@ let PrintMethod = "printImmOperand" in {
def s8Imm : Operand<i32>;
def s8Imm64 : Operand<i64>;
def s6Imm : Operand<i32>;
+ def s6_3Imm : Operand<i32>;
def s4Imm : Operand<i32>;
def s4_0Imm : Operand<i32>;
def s4_1Imm : Operand<i32>;
@@ -51,8 +52,14 @@ let PrintMethod = "printImmOperand" in {
def u6_2Imm : Operand<i32>;
def u6_3Imm : Operand<i32>;
def u5Imm : Operand<i32>;
+ def u5_2Imm : Operand<i32>;
+ def u5_3Imm : Operand<i32>;
def u4Imm : Operand<i32>;
+ def u4_0Imm : Operand<i32>;
+ def u4_2Imm : Operand<i32>;
def u3Imm : Operand<i32>;
+ def u3_0Imm : Operand<i32>;
+ def u3_1Imm : Operand<i32>;
def u2Imm : Operand<i32>;
def u1Imm : Operand<i32>;
def n8Imm : Operand<i32>;
@@ -444,6 +451,7 @@ let PrintMethod = "printExtOperand" in {
def s10Ext : Operand<i32>;
def s9Ext : Operand<i32>;
def s8Ext : Operand<i32>;
+ def s7Ext : Operand<i32>;
def s6Ext : Operand<i32>;
def s11_0Ext : Operand<i32>;
def s11_1Ext : Operand<i32>;
diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
index 4c987ed..6253686 100644
--- a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
@@ -4,8 +4,12 @@ add_llvm_library(LLVMHexagonDesc
+ HexagonMCCompound.cpp
+ HexagonMCDuplexInfo.cpp
+ HexagonMCShuffler.cpp
+ HexagonShuffler.cpp
add_dependencies(LLVMHexagonDesc HexagonCommonTableGen)
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 155aa9e..7689484 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -7,19 +7,150 @@
+#include "Hexagon.h"
+#include "HexagonFixupKinds.h"
#include "HexagonMCTargetDesc.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
+using namespace Hexagon;
namespace {
class HexagonAsmBackend : public MCAsmBackend {
+ uint8_t OSABI;
+ StringRef CPU;
+ mutable uint64_t relaxedCnt;
+ std::unique_ptr <MCInstrInfo> MCII;
+ std::unique_ptr <MCInst *> RelaxTarget;
- HexagonAsmBackend(Target const & /*T*/) {}
+ HexagonAsmBackend(Target const &T, uint8_t OSABI, StringRef CPU) :
+ OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *){}
- unsigned getNumFixupKinds() const override { return 0; }
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+ return createHexagonELFObjectWriter(OS, OSABI, CPU);
+ }
+ unsigned getNumFixupKinds() const override {
+ return Hexagon::NumTargetFixupKinds;
+ }
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
+ const static MCFixupKindInfo Infos[Hexagon::NumTargetFixupKinds] = {
+ // This table *must* be in same the order of fixup_* kinds in
+ // HexagonFixupKinds.h.
+ //
+ // namei offset bits flags
+ {"fixup_Hexagon_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_B15_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_B7_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_LO16", 0, 32, 0},
+ {"fixup_Hexagon_HI16", 0, 32, 0},
+ {"fixup_Hexagon_32", 0, 32, 0},
+ {"fixup_Hexagon_16", 0, 32, 0},
+ {"fixup_Hexagon_8", 0, 32, 0},
+ {"fixup_Hexagon_GPREL16_0", 0, 32, 0},
+ {"fixup_Hexagon_GPREL16_1", 0, 32, 0},
+ {"fixup_Hexagon_GPREL16_2", 0, 32, 0},
+ {"fixup_Hexagon_GPREL16_3", 0, 32, 0},
+ {"fixup_Hexagon_HL16", 0, 32, 0},
+ {"fixup_Hexagon_B13_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_B9_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_B32_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_32_6_X", 0, 32, 0},
+ {"fixup_Hexagon_B22_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_B15_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_B13_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_B9_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_B7_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_16_X", 0, 32, 0},
+ {"fixup_Hexagon_12_X", 0, 32, 0},
+ {"fixup_Hexagon_11_X", 0, 32, 0},
+ {"fixup_Hexagon_10_X", 0, 32, 0},
+ {"fixup_Hexagon_9_X", 0, 32, 0},
+ {"fixup_Hexagon_8_X", 0, 32, 0},
+ {"fixup_Hexagon_7_X", 0, 32, 0},
+ {"fixup_Hexagon_6_X", 0, 32, 0},
+ {"fixup_Hexagon_32_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_COPY", 0, 32, 0},
+ {"fixup_Hexagon_GLOB_DAT", 0, 32, 0},
+ {"fixup_Hexagon_JMP_SLOT", 0, 32, 0},
+ {"fixup_Hexagon_RELATIVE", 0, 32, 0},
+ {"fixup_Hexagon_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_GOTREL_LO16", 0, 32, 0},
+ {"fixup_Hexagon_GOTREL_HI16", 0, 32, 0},
+ {"fixup_Hexagon_GOTREL_32", 0, 32, 0},
+ {"fixup_Hexagon_GOT_LO16", 0, 32, 0},
+ {"fixup_Hexagon_GOT_HI16", 0, 32, 0},
+ {"fixup_Hexagon_GOT_32", 0, 32, 0},
+ {"fixup_Hexagon_GOT_16", 0, 32, 0},
+ {"fixup_Hexagon_DTPMOD_32", 0, 32, 0},
+ {"fixup_Hexagon_DTPREL_LO16", 0, 32, 0},
+ {"fixup_Hexagon_DTPREL_HI16", 0, 32, 0},
+ {"fixup_Hexagon_DTPREL_32", 0, 32, 0},
+ {"fixup_Hexagon_DTPREL_16", 0, 32, 0},
+ {"fixup_Hexagon_GD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_LD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_GD_GOT_LO16", 0, 32, 0},
+ {"fixup_Hexagon_GD_GOT_HI16", 0, 32, 0},
+ {"fixup_Hexagon_GD_GOT_32", 0, 32, 0},
+ {"fixup_Hexagon_GD_GOT_16", 0, 32, 0},
+ {"fixup_Hexagon_LD_GOT_LO16", 0, 32, 0},
+ {"fixup_Hexagon_LD_GOT_HI16", 0, 32, 0},
+ {"fixup_Hexagon_LD_GOT_32", 0, 32, 0},
+ {"fixup_Hexagon_LD_GOT_16", 0, 32, 0},
+ {"fixup_Hexagon_IE_LO16", 0, 32, 0},
+ {"fixup_Hexagon_IE_HI16", 0, 32, 0},
+ {"fixup_Hexagon_IE_32", 0, 32, 0},
+ {"fixup_Hexagon_IE_16", 0, 32, 0},
+ {"fixup_Hexagon_IE_GOT_LO16", 0, 32, 0},
+ {"fixup_Hexagon_IE_GOT_HI16", 0, 32, 0},
+ {"fixup_Hexagon_IE_GOT_32", 0, 32, 0},
+ {"fixup_Hexagon_IE_GOT_16", 0, 32, 0},
+ {"fixup_Hexagon_TPREL_LO16", 0, 32, 0},
+ {"fixup_Hexagon_TPREL_HI16", 0, 32, 0},
+ {"fixup_Hexagon_TPREL_32", 0, 32, 0},
+ {"fixup_Hexagon_TPREL_16", 0, 32, 0},
+ {"fixup_Hexagon_6_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_GOTREL_32_6_X", 0, 32, 0},
+ {"fixup_Hexagon_GOTREL_16_X", 0, 32, 0},
+ {"fixup_Hexagon_GOTREL_11_X", 0, 32, 0},
+ {"fixup_Hexagon_GOT_32_6_X", 0, 32, 0},
+ {"fixup_Hexagon_GOT_16_X", 0, 32, 0},
+ {"fixup_Hexagon_GOT_11_X", 0, 32, 0},
+ {"fixup_Hexagon_DTPREL_32_6_X", 0, 32, 0},
+ {"fixup_Hexagon_DTPREL_16_X", 0, 32, 0},
+ {"fixup_Hexagon_DTPREL_11_X", 0, 32, 0},
+ {"fixup_Hexagon_GD_GOT_32_6_X", 0, 32, 0},
+ {"fixup_Hexagon_GD_GOT_16_X", 0, 32, 0},
+ {"fixup_Hexagon_GD_GOT_11_X", 0, 32, 0},
+ {"fixup_Hexagon_LD_GOT_32_6_X", 0, 32, 0},
+ {"fixup_Hexagon_LD_GOT_16_X", 0, 32, 0},
+ {"fixup_Hexagon_LD_GOT_11_X", 0, 32, 0},
+ {"fixup_Hexagon_IE_32_6_X", 0, 32, 0},
+ {"fixup_Hexagon_IE_16_X", 0, 32, 0},
+ {"fixup_Hexagon_IE_GOT_32_6_X", 0, 32, 0},
+ {"fixup_Hexagon_IE_GOT_16_X", 0, 32, 0},
+ {"fixup_Hexagon_IE_GOT_11_X", 0, 32, 0},
+ {"fixup_Hexagon_TPREL_32_6_X", 0, 32, 0},
+ {"fixup_Hexagon_TPREL_16_X", 0, 32, 0},
+ {"fixup_Hexagon_TPREL_11_X", 0, 32, 0}};
+ if (Kind < FirstTargetFixupKind) {
+ return MCAsmBackend::getFixupKindInfo(Kind);
+ }
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
void applyFixup(MCFixup const & /*Fixup*/, char * /*Data*/,
unsigned /*DataSize*/, uint64_t /*Value*/,
@@ -27,14 +158,119 @@ public:
- bool mayNeedRelaxation(MCInst const & /*Inst*/) const override {
+ bool isInstRelaxable(MCInst const &HMI) const {
+ const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(*MCII, HMI);
+ bool Relaxable = false;
+ // Branches and loop-setup insns are handled as necessary by relaxation.
+ if (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeJ ||
+ (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeNV &&
+ MCID.isBranch()) ||
+ (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeCR &&
+ HMI.getOpcode() != Hexagon::C4_addipc))
+ if (HexagonMCInstrInfo::isExtendable(*MCII, HMI))
+ Relaxable = true;
+ return Relaxable;
+ }
+ /// MayNeedRelaxation - Check whether the given instruction may need
+ /// relaxation.
+ ///
+ /// \param Inst - The instruction to test.
+ bool mayNeedRelaxation(MCInst const &Inst) const override {
+ assert(HexagonMCInstrInfo::isBundle(Inst));
+ bool PreviousIsExtender = false;
+ for (auto const &I : HexagonMCInstrInfo::bundleInstructions(Inst)) {
+ auto const &Inst = *I.getInst();
+ if (!PreviousIsExtender) {
+ if (isInstRelaxable(Inst))
+ return true;
+ }
+ PreviousIsExtender = HexagonMCInstrInfo::isImmext(Inst);
+ }
return false;
- bool fixupNeedsRelaxation(MCFixup const & /*Fixup*/, uint64_t /*Value*/,
- MCRelaxableFragment const * /*DF*/,
- MCAsmLayout const & /*Layout*/) const override {
- llvm_unreachable("fixupNeedsRelaxation() unimplemented");
+ /// fixupNeedsRelaxation - Target specific predicate for whether a given
+ /// fixup requires the associated instruction to be relaxed.
+ bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
+ MCInst const &MCB = DF->getInst();
+ assert(HexagonMCInstrInfo::isBundle(MCB));
+ *RelaxTarget = nullptr;
+ MCInst &MCI = const_cast<MCInst &>(HexagonMCInstrInfo::instruction(
+ MCB, Fixup.getOffset() / HEXAGON_INSTR_SIZE));
+ // If we cannot resolve the fixup value, it requires relaxation.
+ if (!Resolved) {
+ switch ((unsigned)Fixup.getKind()) {
+ case fixup_Hexagon_B22_PCREL:
+ // GetFixupCount assumes B22 won't relax
+ // Fallthrough
+ default:
+ return false;
+ break;
+ case fixup_Hexagon_B13_PCREL:
+ case fixup_Hexagon_B15_PCREL:
+ case fixup_Hexagon_B9_PCREL:
+ case fixup_Hexagon_B7_PCREL: {
+ if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) {
+ ++relaxedCnt;
+ *RelaxTarget = &MCI;
+ return true;
+ } else {
+ return false;
+ }
+ break;
+ }
+ }
+ }
+ bool Relaxable = isInstRelaxable(MCI);
+ if (Relaxable == false)
+ return false;
+ MCFixupKind Kind = Fixup.getKind();
+ int64_t sValue = Value;
+ int64_t maxValue;
+ switch ((unsigned)Kind) {
+ case fixup_Hexagon_B7_PCREL:
+ maxValue = 1 << 8;
+ break;
+ case fixup_Hexagon_B9_PCREL:
+ maxValue = 1 << 10;
+ break;
+ case fixup_Hexagon_B15_PCREL:
+ maxValue = 1 << 16;
+ break;
+ case fixup_Hexagon_B22_PCREL:
+ maxValue = 1 << 23;
+ break;
+ default:
+ maxValue = INT64_MAX;
+ break;
+ }
+ bool isFarAway = -maxValue > sValue || sValue > maxValue - 1;
+ if (isFarAway) {
+ if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) {
+ ++relaxedCnt;
+ *RelaxTarget = &MCI;
+ return true;
+ }
+ }
+ return false;
+ }
+ /// Simple predicate for targets where !Resolved implies requiring relaxation
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
+ llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced");
void relaxInstruction(MCInst const & /*Inst*/,
@@ -49,26 +285,11 @@ public:
} // end anonymous namespace
-namespace {
-class ELFHexagonAsmBackend : public HexagonAsmBackend {
- uint8_t OSABI;
- ELFHexagonAsmBackend(Target const &T, uint8_t OSABI)
- : HexagonAsmBackend(T), OSABI(OSABI) {}
- MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
- StringRef CPU("HexagonV4");
- return createHexagonELFObjectWriter(OS, OSABI, CPU);
- }
-} // end anonymous namespace
namespace llvm {
MCAsmBackend *createHexagonAsmBackend(Target const &T,
MCRegisterInfo const & /*MRI*/,
- StringRef TT, StringRef /*CPU*/) {
+ StringRef TT, StringRef CPU) {
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
- return new ELFHexagonAsmBackend(T, OSABI);
+ return new HexagonAsmBackend(T, OSABI, CPU);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index 6a72f20..f4d162c 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -43,6 +43,7 @@ namespace HexagonII {
TypeXTYPE = 8,
TypeMEMOP = 9,
TypeNV = 10,
+ TypeDUPLEX = 11,
TypePREFIX = 30, // Such as extenders.
TypeENDLOOP = 31 // Such as end of a HW loop.
@@ -190,7 +191,26 @@ namespace HexagonII {
- enum class InstParseBits : uint32_t {
+ // Hexagon Sub-instruction classes.
+ enum SubInstructionGroup {
+ HSIG_None = 0,
+ HSIG_L1,
+ HSIG_L2,
+ HSIG_S1,
+ HSIG_S2,
+ HSIG_Compound
+ };
+ // Hexagon Compound classes.
+ enum CompoundGroup {
+ HCG_None = 0,
+ HCG_A,
+ HCG_B,
+ };
+ enum InstParseBits {
INST_PARSE_MASK = 0x0000c000,
INST_PARSE_LOOP_END = 0x00008000,
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index fde935b..8430723 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -8,6 +8,7 @@
#include "Hexagon.h"
+#include "MCTargetDesc/HexagonFixupKinds.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/Support/Debug.h"
@@ -40,17 +41,306 @@ HexagonELFObjectWriter::HexagonELFObjectWriter(uint8_t OSABI, StringRef C)
unsigned HexagonELFObjectWriter::GetRelocType(MCValue const &/*Target*/,
MCFixup const &Fixup,
bool IsPCRel) const {
+ // determine the type of the relocation
unsigned Type = (unsigned)ELF::R_HEX_NONE;
- llvm::MCFixupKind Kind = Fixup.getKind();
+ unsigned Kind = (unsigned)Fixup.getKind();
switch (Kind) {
- default:
- DEBUG(dbgs() << "unrecognized relocation " << Fixup.getKind() << "\n");
- llvm_unreachable("Unimplemented Fixup kind!");
- break;
- case FK_Data_4:
- Type = (IsPCRel) ? ELF::R_HEX_32_PCREL : ELF::R_HEX_32;
- break;
+ default:
+ DEBUG(dbgs() << "unrecognized relocation " << Fixup.getKind() << "\n");
+ llvm_unreachable("Unimplemented Fixup kind!");
+ break;
+ case FK_Data_4:
+ Type = (IsPCRel) ? ELF::R_HEX_32_PCREL : ELF::R_HEX_32;
+ break;
+ case FK_PCRel_4:
+ Type = ELF::R_HEX_32_PCREL;
+ break;
+ case FK_Data_2:
+ Type = ELF::R_HEX_16;
+ break;
+ case FK_Data_1:
+ Type = ELF::R_HEX_8;
+ break;
+ case fixup_Hexagon_B22_PCREL:
+ Type = ELF::R_HEX_B22_PCREL;
+ break;
+ case fixup_Hexagon_B15_PCREL:
+ Type = ELF::R_HEX_B15_PCREL;
+ break;
+ case fixup_Hexagon_B7_PCREL:
+ Type = ELF::R_HEX_B7_PCREL;
+ break;
+ case fixup_Hexagon_LO16:
+ Type = ELF::R_HEX_LO16;
+ break;
+ case fixup_Hexagon_HI16:
+ Type = ELF::R_HEX_HI16;
+ break;
+ case fixup_Hexagon_32:
+ Type = ELF::R_HEX_32;
+ break;
+ case fixup_Hexagon_16:
+ Type = ELF::R_HEX_16;
+ break;
+ case fixup_Hexagon_8:
+ Type = ELF::R_HEX_8;
+ break;
+ case fixup_Hexagon_GPREL16_0:
+ Type = ELF::R_HEX_GPREL16_0;
+ break;
+ case fixup_Hexagon_GPREL16_1:
+ Type = ELF::R_HEX_GPREL16_1;
+ break;
+ case fixup_Hexagon_GPREL16_2:
+ Type = ELF::R_HEX_GPREL16_2;
+ break;
+ case fixup_Hexagon_GPREL16_3:
+ Type = ELF::R_HEX_GPREL16_3;
+ break;
+ case fixup_Hexagon_HL16:
+ Type = ELF::R_HEX_HL16;
+ break;
+ case fixup_Hexagon_B13_PCREL:
+ Type = ELF::R_HEX_B13_PCREL;
+ break;
+ case fixup_Hexagon_B9_PCREL:
+ Type = ELF::R_HEX_B9_PCREL;
+ break;
+ case fixup_Hexagon_B32_PCREL_X:
+ Type = ELF::R_HEX_B32_PCREL_X;
+ break;
+ case fixup_Hexagon_32_6_X:
+ Type = ELF::R_HEX_32_6_X;
+ break;
+ case fixup_Hexagon_B22_PCREL_X:
+ Type = ELF::R_HEX_B22_PCREL_X;
+ break;
+ case fixup_Hexagon_B15_PCREL_X:
+ Type = ELF::R_HEX_B15_PCREL_X;
+ break;
+ case fixup_Hexagon_B13_PCREL_X:
+ Type = ELF::R_HEX_B13_PCREL_X;
+ break;
+ case fixup_Hexagon_B9_PCREL_X:
+ Type = ELF::R_HEX_B9_PCREL_X;
+ break;
+ case fixup_Hexagon_B7_PCREL_X:
+ Type = ELF::R_HEX_B7_PCREL_X;
+ break;
+ case fixup_Hexagon_16_X:
+ Type = ELF::R_HEX_16_X;
+ break;
+ case fixup_Hexagon_12_X:
+ Type = ELF::R_HEX_12_X;
+ break;
+ case fixup_Hexagon_11_X:
+ Type = ELF::R_HEX_11_X;
+ break;
+ case fixup_Hexagon_10_X:
+ Type = ELF::R_HEX_10_X;
+ break;
+ case fixup_Hexagon_9_X:
+ Type = ELF::R_HEX_9_X;
+ break;
+ case fixup_Hexagon_8_X:
+ Type = ELF::R_HEX_8_X;
+ break;
+ case fixup_Hexagon_7_X:
+ Type = ELF::R_HEX_7_X;
+ break;
+ case fixup_Hexagon_6_X:
+ Type = ELF::R_HEX_6_X;
+ break;
+ case fixup_Hexagon_32_PCREL:
+ Type = ELF::R_HEX_32_PCREL;
+ break;
+ case fixup_Hexagon_COPY:
+ Type = ELF::R_HEX_COPY;
+ break;
+ case fixup_Hexagon_GLOB_DAT:
+ break;
+ case fixup_Hexagon_JMP_SLOT:
+ break;
+ case fixup_Hexagon_RELATIVE:
+ break;
+ case fixup_Hexagon_PLT_B22_PCREL:
+ Type = ELF::R_HEX_PLT_B22_PCREL;
+ break;
+ case fixup_Hexagon_GOTREL_LO16:
+ Type = ELF::R_HEX_GOTREL_LO16;
+ break;
+ case fixup_Hexagon_GOTREL_HI16:
+ Type = ELF::R_HEX_GOTREL_HI16;
+ break;
+ case fixup_Hexagon_GOTREL_32:
+ Type = ELF::R_HEX_GOTREL_32;
+ break;
+ case fixup_Hexagon_GOT_LO16:
+ Type = ELF::R_HEX_GOT_LO16;
+ break;
+ case fixup_Hexagon_GOT_HI16:
+ Type = ELF::R_HEX_GOT_HI16;
+ break;
+ case fixup_Hexagon_GOT_32:
+ Type = ELF::R_HEX_GOT_32;
+ break;
+ case fixup_Hexagon_GOT_16:
+ Type = ELF::R_HEX_GOT_16;
+ break;
+ case fixup_Hexagon_DTPMOD_32:
+ Type = ELF::R_HEX_DTPMOD_32;
+ break;
+ case fixup_Hexagon_DTPREL_LO16:
+ Type = ELF::R_HEX_DTPREL_LO16;
+ break;
+ case fixup_Hexagon_DTPREL_HI16:
+ Type = ELF::R_HEX_DTPREL_HI16;
+ break;
+ case fixup_Hexagon_DTPREL_32:
+ Type = ELF::R_HEX_DTPREL_32;
+ break;
+ case fixup_Hexagon_DTPREL_16:
+ Type = ELF::R_HEX_DTPREL_16;
+ break;
+ case fixup_Hexagon_GD_PLT_B22_PCREL:
+ break;
+ case fixup_Hexagon_LD_PLT_B22_PCREL:
+ break;
+ case fixup_Hexagon_GD_GOT_LO16:
+ Type = ELF::R_HEX_GD_GOT_LO16;
+ break;
+ case fixup_Hexagon_GD_GOT_HI16:
+ Type = ELF::R_HEX_GD_GOT_HI16;
+ break;
+ case fixup_Hexagon_GD_GOT_32:
+ Type = ELF::R_HEX_GD_GOT_32;
+ break;
+ case fixup_Hexagon_GD_GOT_16:
+ Type = ELF::R_HEX_GD_GOT_16;
+ break;
+ case fixup_Hexagon_LD_GOT_LO16:
+ Type = ELF::R_HEX_LD_GOT_LO16;
+ break;
+ case fixup_Hexagon_LD_GOT_HI16:
+ Type = ELF::R_HEX_LD_GOT_HI16;
+ break;
+ case fixup_Hexagon_LD_GOT_32:
+ Type = ELF::R_HEX_LD_GOT_32;
+ break;
+ case fixup_Hexagon_LD_GOT_16:
+ Type = ELF::R_HEX_LD_GOT_16;
+ break;
+ case fixup_Hexagon_IE_LO16:
+ Type = ELF::R_HEX_IE_LO16;
+ break;
+ case fixup_Hexagon_IE_HI16:
+ Type = ELF::R_HEX_IE_HI16;
+ break;
+ case fixup_Hexagon_IE_32:
+ Type = ELF::R_HEX_IE_32;
+ break;
+ case fixup_Hexagon_IE_GOT_LO16:
+ Type = ELF::R_HEX_IE_GOT_LO16;
+ break;
+ case fixup_Hexagon_IE_GOT_HI16:
+ Type = ELF::R_HEX_IE_GOT_HI16;
+ break;
+ case fixup_Hexagon_IE_GOT_32:
+ Type = ELF::R_HEX_IE_GOT_32;
+ break;
+ case fixup_Hexagon_IE_GOT_16:
+ Type = ELF::R_HEX_IE_GOT_16;
+ break;
+ case fixup_Hexagon_TPREL_LO16:
+ Type = ELF::R_HEX_TPREL_LO16;
+ break;
+ case fixup_Hexagon_TPREL_HI16:
+ Type = ELF::R_HEX_TPREL_HI16;
+ break;
+ case fixup_Hexagon_TPREL_32:
+ Type = ELF::R_HEX_TPREL_32;
+ break;
+ case fixup_Hexagon_TPREL_16:
+ Type = ELF::R_HEX_TPREL_16;
+ break;
+ case fixup_Hexagon_6_PCREL_X:
+ Type = ELF::R_HEX_6_PCREL_X;
+ break;
+ case fixup_Hexagon_GOTREL_32_6_X:
+ Type = ELF::R_HEX_GOTREL_32_6_X;
+ break;
+ case fixup_Hexagon_GOTREL_16_X:
+ Type = ELF::R_HEX_GOTREL_16_X;
+ break;
+ case fixup_Hexagon_GOTREL_11_X:
+ Type = ELF::R_HEX_GOTREL_11_X;
+ break;
+ case fixup_Hexagon_GOT_32_6_X:
+ Type = ELF::R_HEX_GOT_32_6_X;
+ break;
+ case fixup_Hexagon_GOT_16_X:
+ Type = ELF::R_HEX_GOT_16_X;
+ break;
+ case fixup_Hexagon_GOT_11_X:
+ Type = ELF::R_HEX_GOT_11_X;
+ break;
+ case fixup_Hexagon_DTPREL_32_6_X:
+ Type = ELF::R_HEX_DTPREL_32_6_X;
+ break;
+ case fixup_Hexagon_DTPREL_16_X:
+ Type = ELF::R_HEX_DTPREL_16_X;
+ break;
+ case fixup_Hexagon_DTPREL_11_X:
+ Type = ELF::R_HEX_DTPREL_11_X;
+ break;
+ case fixup_Hexagon_GD_GOT_32_6_X:
+ Type = ELF::R_HEX_GD_GOT_32_6_X;
+ break;
+ case fixup_Hexagon_GD_GOT_16_X:
+ Type = ELF::R_HEX_GD_GOT_16_X;
+ break;
+ case fixup_Hexagon_GD_GOT_11_X:
+ Type = ELF::R_HEX_GD_GOT_11_X;
+ break;
+ case fixup_Hexagon_LD_GOT_32_6_X:
+ Type = ELF::R_HEX_LD_GOT_32_6_X;
+ break;
+ case fixup_Hexagon_LD_GOT_16_X:
+ Type = ELF::R_HEX_LD_GOT_16_X;
+ break;
+ case fixup_Hexagon_LD_GOT_11_X:
+ Type = ELF::R_HEX_LD_GOT_11_X;
+ break;
+ case fixup_Hexagon_IE_32_6_X:
+ Type = ELF::R_HEX_IE_32_6_X;
+ break;
+ case fixup_Hexagon_IE_16_X:
+ Type = ELF::R_HEX_IE_16_X;
+ break;
+ case fixup_Hexagon_IE_GOT_32_6_X:
+ Type = ELF::R_HEX_IE_GOT_32_6_X;
+ break;
+ case fixup_Hexagon_IE_GOT_16_X:
+ Type = ELF::R_HEX_IE_GOT_16_X;
+ break;
+ case fixup_Hexagon_IE_GOT_11_X:
+ Type = ELF::R_HEX_IE_GOT_11_X;
+ break;
+ case fixup_Hexagon_TPREL_32_6_X:
+ Type = ELF::R_HEX_TPREL_32_6_X;
+ break;
+ case fixup_Hexagon_TPREL_16_X:
+ Type = ELF::R_HEX_TPREL_16_X;
+ break;
+ case fixup_Hexagon_TPREL_11_X:
+ Type = ELF::R_HEX_TPREL_11_X;
+ break;
return Type;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 15cda71..36f8146 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -28,7 +28,47 @@ using namespace llvm;
#include ""
-const char HexagonInstPrinter::PacketPadding = '\t';
+HexagonAsmInstPrinter::HexagonAsmInstPrinter(MCInstPrinter *RawPrinter)
+ : MCInstPrinter(*RawPrinter), RawPrinter(RawPrinter) {}
+void HexagonAsmInstPrinter::printInst(MCInst const *MI, raw_ostream &O,
+ StringRef Annot,
+ MCSubtargetInfo const &STI) {
+ assert(HexagonMCInstrInfo::isBundle(*MI));
+ assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE);
+ std::string Buffer;
+ {
+ raw_string_ostream TempStream(Buffer);
+ RawPrinter->printInst(MI, TempStream, "", STI);
+ }
+ StringRef Contents(Buffer);
+ auto PacketBundle = Contents.rsplit('\n');
+ auto HeadTail = PacketBundle.first.split('\n');
+ auto Preamble = "\t{\n\t\t";
+ auto Separator = "";
+ while(!HeadTail.first.empty()) {
+ O << Separator;
+ StringRef Inst;
+ auto Duplex = HeadTail.first.split('\v');
+ if(!Duplex.second.empty()){
+ O << Duplex.first << "\n";
+ Inst = Duplex.second;
+ }
+ else
+ Inst = Duplex.first;
+ O << Preamble;
+ O << Inst;
+ HeadTail = HeadTail.second.split('\n');
+ Preamble = "";
+ Separator = "\n\t\t";
+ }
+ O << "\n\t}" << PacketBundle.second;
+void HexagonAsmInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
+ RawPrinter->printRegName(O, RegNo);
// Return the minimum value that a constant extendable operand can have
// without being extended.
static int getMinValue(uint64_t TSFlags) {
@@ -77,48 +117,44 @@ void HexagonInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << getRegisterName(RegNo);
-void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &O,
- StringRef Annot,
- const MCSubtargetInfo &STI) {
- const char startPacket = '{',
- endPacket = '}';
- // TODO: add outer HW loop when it's supported too.
- if (MI->getOpcode() == Hexagon::ENDLOOP0) {
- // Ending a harware loop is different from ending an regular packet.
- assert(HexagonMCInstrInfo::isPacketEnd(*MI) && "Loop-end must also end the packet");
- if (HexagonMCInstrInfo::isPacketBegin(*MI)) {
- // There must be a packet to end a loop.
- // FIXME: when shuffling is always run, this shouldn't be needed.
- MCInst Nop;
- StringRef NoAnnot;
- Nop.setOpcode (Hexagon::A2_nop);
- HexagonMCInstrInfo::setPacketBegin (Nop, HexagonMCInstrInfo::isPacketBegin(*MI));
- printInst (&Nop, O, NoAnnot, STI);
- }
+void HexagonInstPrinter::setExtender(MCInst const &MCI) {
+ HasExtender = HexagonMCInstrInfo::isImmext(MCI);
- // Close the packet.
- if (HexagonMCInstrInfo::isPacketEnd(*MI))
- O << PacketPadding << endPacket;
+void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &OS,
+ StringRef Annot,
+ MCSubtargetInfo const &STI) {
+ assert(HexagonMCInstrInfo::isBundle(*MI));
+ assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE);
+ HasExtender = false;
+ for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MI)) {
+ MCInst const &MCI = *I.getInst();
+ if (HexagonMCInstrInfo::isDuplex(MII, MCI)) {
+ printInstruction(MCI.getOperand(1).getInst(), OS);
+ OS << '\v';
+ HasExtender = false;
+ printInstruction(MCI.getOperand(0).getInst(), OS);
+ } else
+ printInstruction(&MCI, OS);
+ setExtender(MCI);
+ OS << "\n";
+ }
- printInstruction(MI, O);
+ auto Separator = "";
+ if (HexagonMCInstrInfo::isInnerLoop(*MI)) {
+ OS << Separator;
+ Separator = " ";
+ MCInst ME;
+ ME.setOpcode(Hexagon::ENDLOOP0);
+ printInstruction(&ME, OS);
- else {
- // Prefix the insn opening the packet.
- if (HexagonMCInstrInfo::isPacketBegin(*MI))
- O << PacketPadding << startPacket << '\n';
- printInstruction(MI, O);
- // Suffix the insn closing the packet.
- if (HexagonMCInstrInfo::isPacketEnd(*MI))
- // Suffix the packet in a new line always, since the GNU assembler has
- // issues with a closing brace on the same line as CONST{32,64}.
- O << '\n' << PacketPadding << endPacket;
+ if (HexagonMCInstrInfo::isOuterLoop(*MI)) {
+ OS << Separator;
+ Separator = " ";
+ MCInst ME;
+ ME.setOpcode(Hexagon::ENDLOOP1);
+ printInstruction(&ME, OS);
- printAnnotation(O, Annot);
void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
@@ -128,7 +164,7 @@ void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (MO.isReg()) {
printRegName(O, MO.getReg());
} else if(MO.isExpr()) {
- O << *MO.getExpr();
+ MO.getExpr()->print(O, &MAI);
} else if(MO.isImm()) {
printImmOperand(MI, OpNo, O);
} else {
@@ -141,7 +177,7 @@ void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo,
const MCOperand& MO = MI->getOperand(OpNo);
if(MO.isExpr()) {
- O << *MO.getExpr();
+ MO.getExpr()->print(O, &MAI);
} else if(MO.isImm()) {
O << MI->getOperand(OpNo).getImm();
} else {
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index 3fedaed..534ac23 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -18,6 +18,21 @@
#include "llvm/MC/MCInstrInfo.h"
namespace llvm {
+class HexagonAsmInstPrinter : public MCInstPrinter {
+ HexagonAsmInstPrinter(MCInstPrinter *RawPrinter);
+ void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
+ MCSubtargetInfo const &STI) override;
+ void printRegName(raw_ostream &O, unsigned RegNo) const override;
+ std::unique_ptr<MCInstPrinter> RawPrinter;
+/// Prints bundles as a newline separated list of individual instructions
+/// Duplexes are separated by a vertical tab \v character
+/// A trailing line includes bundle properties such as endloop0/1
+/// r0 = add(r1, r2)
+/// r0 = #0 \v jump 0x0
+/// :endloop0 :endloop1
class HexagonInstPrinter : public MCInstPrinter {
explicit HexagonInstPrinter(MCAsmInfo const &MAI,
@@ -74,11 +89,11 @@ namespace llvm {
void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi)
- static const char PacketPadding;
const MCInstrInfo &MII;
+ bool HasExtender;
+ void setExtender(MCInst const &MCI);
} // end namespace llvm
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index ad5e0fb..51d2f1c 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -18,7 +18,7 @@ using namespace llvm;
// Pin the vtable to this file.
void HexagonMCAsmInfo::anchor() {}
-HexagonMCAsmInfo::HexagonMCAsmInfo(StringRef TT) {
+HexagonMCAsmInfo::HexagonMCAsmInfo(const Triple &TT) {
Data16bitsDirective = "\t.half\t";
Data32bitsDirective = "\t.word\t";
Data64bitsDirective = nullptr; // .xword is only supported by V9.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
index ab18f0b..dc07069 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -18,10 +18,12 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
+ class Triple;
class HexagonMCAsmInfo : public MCAsmInfoELF {
void anchor() override;
- explicit HexagonMCAsmInfo(StringRef TT);
+ explicit HexagonMCAsmInfo(const Triple &TT);
} // namespace llvm
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index ae3953a..1eee852 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -22,6 +22,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/EndianStream.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "mccodeemitter"
@@ -31,38 +32,206 @@ using namespace Hexagon;
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
-namespace {
-/// \brief 10.6 Instruction Packets
-/// Possible values for instruction packet parse field.
-enum class ParseField { duplex = 0x0, last0 = 0x1, last1 = 0x2, end = 0x3 };
-/// \brief Returns the packet bits based on instruction position.
-uint32_t getPacketBits(MCInst const &HMI) {
- unsigned const ParseFieldOffset = 14;
- ParseField Field = HexagonMCInstrInfo::isPacketEnd(HMI) ? ParseField::end
- : ParseField::last0;
- return static_cast<uint32_t>(Field) << ParseFieldOffset;
-void emitLittleEndian(uint64_t Binary, raw_ostream &OS) {
- OS << static_cast<uint8_t>((Binary >> 0x00) & 0xff);
- OS << static_cast<uint8_t>((Binary >> 0x08) & 0xff);
- OS << static_cast<uint8_t>((Binary >> 0x10) & 0xff);
- OS << static_cast<uint8_t>((Binary >> 0x18) & 0xff);
HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII,
MCContext &aMCT)
: MCT(aMCT), MCII(aMII), Addend(new unsigned(0)),
- Extended(new bool(false)) {}
+ Extended(new bool(false)), CurrentBundle(new MCInst const *) {}
+uint32_t HexagonMCCodeEmitter::parseBits(size_t Instruction, size_t Last,
+ MCInst const &MCB,
+ MCInst const &MCI) const {
+ bool Duplex = HexagonMCInstrInfo::isDuplex(MCII, MCI);
+ if (Instruction == 0) {
+ if (HexagonMCInstrInfo::isInnerLoop(MCB)) {
+ assert(!Duplex);
+ assert(Instruction != Last);
+ return HexagonII::INST_PARSE_LOOP_END;
+ }
+ }
+ if (Instruction == 1) {
+ if (HexagonMCInstrInfo::isOuterLoop(MCB)) {
+ assert(!Duplex);
+ assert(Instruction != Last);
+ return HexagonII::INST_PARSE_LOOP_END;
+ }
+ }
+ if (Duplex) {
+ assert(Instruction == Last);
+ return HexagonII::INST_PARSE_DUPLEX;
+ }
+ if(Instruction == Last)
+ return HexagonII::INST_PARSE_PACKET_END;
+ return HexagonII::INST_PARSE_NOT_END;
void HexagonMCCodeEmitter::encodeInstruction(MCInst const &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
MCSubtargetInfo const &STI) const {
- uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI) | getPacketBits(MI);
- assert(HexagonMCInstrInfo::getDesc(MCII, MI).getSize() == 4 &&
- "All instructions should be 32bit");
- (void)&MCII;
- emitLittleEndian(Binary, OS);
+ MCInst &HMB = const_cast<MCInst &>(MI);
+ assert(HexagonMCInstrInfo::isBundle(HMB));
+ DEBUG(dbgs() << "Encoding bundle\n";);
+ *Addend = 0;
+ *Extended = false;
+ *CurrentBundle = &MI;
+ size_t Instruction = 0;
+ size_t Last = HexagonMCInstrInfo::bundleSize(HMB) - 1;
+ for (auto &I : HexagonMCInstrInfo::bundleInstructions(HMB)) {
+ MCInst &HMI = const_cast<MCInst &>(*I.getInst());
+ EncodeSingleInstruction(HMI, OS, Fixups, STI,
+ parseBits(Instruction, Last, HMB, HMI),
+ Instruction);
+ *Extended = HexagonMCInstrInfo::isImmext(HMI);
+ ++Instruction;
+ }
+ return;
+/// EncodeSingleInstruction - Emit a single
+void HexagonMCCodeEmitter::EncodeSingleInstruction(
+ const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI, uint32_t Parse, size_t Index) const {
+ MCInst HMB = MI;
+ assert(!HexagonMCInstrInfo::isBundle(HMB));
+ uint64_t Binary;
+ // Pseudo instructions don't get encoded and shouldn't be here
+ // in the first place!
+ assert(!HexagonMCInstrInfo::getDesc(MCII, HMB).isPseudo() &&
+ "pseudo-instruction found");
+ DEBUG(dbgs() << "Encoding insn"
+ " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'"
+ "\n");
+ if (HexagonMCInstrInfo::isNewValue(MCII, HMB)) {
+ // Calculate the new value distance to the associated producer
+ MCOperand &MCO =
+ HMB.getOperand(HexagonMCInstrInfo::getNewValueOp(MCII, HMB));
+ unsigned SOffset = 0;
+ unsigned Register = MCO.getReg();
+ unsigned Register1;
+ auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
+ auto i = Instructions.begin() + Index - 1;
+ for (;; --i) {
+ assert(i != Instructions.begin() - 1 && "Couldn't find producer");
+ MCInst const &Inst = *i->getInst();
+ if (HexagonMCInstrInfo::isImmext(Inst))
+ continue;
+ ++SOffset;
+ Register1 =
+ HexagonMCInstrInfo::hasNewValue(MCII, Inst)
+ ? HexagonMCInstrInfo::getNewValueOperand(MCII, Inst).getReg()
+ : static_cast<unsigned>(Hexagon::NoRegister);
+ if (Register != Register1)
+ // This isn't the register we're looking for
+ continue;
+ if (!HexagonMCInstrInfo::isPredicated(MCII, Inst))
+ // Producer is unpredicated
+ break;
+ assert(HexagonMCInstrInfo::isPredicated(MCII, HMB) &&
+ "Unpredicated consumer depending on predicated producer");
+ if (HexagonMCInstrInfo::isPredicatedTrue(MCII, Inst) ==
+ HexagonMCInstrInfo::isPredicatedTrue(MCII, HMB))
+ // Producer predicate sense matched ours
+ break;
+ }
+ // Hexagon PRM 10.11 Construct Nt from distance
+ unsigned Offset = SOffset;
+ Offset <<= 1;
+ MCO.setReg(Offset + Hexagon::R0);
+ }
+ Binary = getBinaryCodeForInstr(HMB, Fixups, STI);
+ // Check for unimplemented instructions. Immediate extenders
+ // are encoded as zero, so they need to be accounted for.
+ if ((!Binary) &&
+ ((HMB.getOpcode() != DuplexIClass0) && (HMB.getOpcode() != A4_ext) &&
+ (HMB.getOpcode() != A4_ext_b) && (HMB.getOpcode() != A4_ext_c) &&
+ (HMB.getOpcode() != A4_ext_g))) {
+ // Use a A2_nop for unimplemented instructions.
+ DEBUG(dbgs() << "Unimplemented inst: "
+ " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'"
+ "\n");
+ llvm_unreachable("Unimplemented Instruction");
+ }
+ Binary |= Parse;
+ // if we need to emit a duplexed instruction
+ if (HMB.getOpcode() >= Hexagon::DuplexIClass0 &&
+ HMB.getOpcode() <= Hexagon::DuplexIClassF) {
+ assert(Parse == HexagonII::INST_PARSE_DUPLEX &&
+ "Emitting duplex without duplex parse bits");
+ unsigned dupIClass;
+ switch (HMB.getOpcode()) {
+ case Hexagon::DuplexIClass0:
+ dupIClass = 0;
+ break;
+ case Hexagon::DuplexIClass1:
+ dupIClass = 1;
+ break;
+ case Hexagon::DuplexIClass2:
+ dupIClass = 2;
+ break;
+ case Hexagon::DuplexIClass3:
+ dupIClass = 3;
+ break;
+ case Hexagon::DuplexIClass4:
+ dupIClass = 4;
+ break;
+ case Hexagon::DuplexIClass5:
+ dupIClass = 5;
+ break;
+ case Hexagon::DuplexIClass6:
+ dupIClass = 6;
+ break;
+ case Hexagon::DuplexIClass7:
+ dupIClass = 7;
+ break;
+ case Hexagon::DuplexIClass8:
+ dupIClass = 8;
+ break;
+ case Hexagon::DuplexIClass9:
+ dupIClass = 9;
+ break;
+ case Hexagon::DuplexIClassA:
+ dupIClass = 10;
+ break;
+ case Hexagon::DuplexIClassB:
+ dupIClass = 11;
+ break;
+ case Hexagon::DuplexIClassC:
+ dupIClass = 12;
+ break;
+ case Hexagon::DuplexIClassD:
+ dupIClass = 13;
+ break;
+ case Hexagon::DuplexIClassE:
+ dupIClass = 14;
+ break;
+ case Hexagon::DuplexIClassF:
+ dupIClass = 15;
+ break;
+ default:
+ llvm_unreachable("Unimplemented DuplexIClass");
+ break;
+ }
+ // 29 is the bit position.
+ // 0b1110 =0xE bits are masked off and down shifted by 1 bit.
+ // Last bit is moved to bit position 13
+ Binary = ((dupIClass & 0xE) << (29 - 1)) | ((dupIClass & 0x1) << 13);
+ const MCInst *subInst0 = HMB.getOperand(0).getInst();
+ const MCInst *subInst1 = HMB.getOperand(1).getInst();
+ // get subinstruction slot 0
+ unsigned subInstSlot0Bits = getBinaryCodeForInstr(*subInst0, Fixups, STI);
+ // get subinstruction slot 1
+ unsigned subInstSlot1Bits = getBinaryCodeForInstr(*subInst1, Fixups, STI);
+ Binary |= subInstSlot0Bits | (subInstSlot1Bits << 16);
+ }
+ support::endian::Writer<support::little>(OS).write<uint32_t>(Binary);
@@ -182,7 +351,7 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
int64_t Res;
- if (ME->EvaluateAsAbsolute(Res))
+ if (ME->evaluateAsAbsolute(Res))
return Res;
MCExpr::ExprKind MK = ME->getKind();
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
index 939380a..9aa258c 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -30,6 +30,7 @@ class HexagonMCCodeEmitter : public MCCodeEmitter {
MCInstrInfo const &MCII;
std::unique_ptr<unsigned> Addend;
std::unique_ptr<bool> Extended;
+ std::unique_ptr<MCInst const *> CurrentBundle;
// helper routine for getMachineOpValue()
unsigned getExprOpValue(const MCInst &MI, const MCOperand &MO,
@@ -39,12 +40,21 @@ class HexagonMCCodeEmitter : public MCCodeEmitter {
HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT);
+ // Return parse bits for instruction `MCI' inside bundle `MCB'
+ uint32_t parseBits(size_t Instruction, size_t Last, MCInst const &MCB,
+ MCInst const &MCI) const;
MCSubtargetInfo const &getSubtargetInfo() const;
void encodeInstruction(MCInst const &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
MCSubtargetInfo const &STI) const override;
+ void EncodeSingleInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI,
+ uint32_t Parse, size_t Index) const;
// \brief TableGen'erated function for getting the
// binary encoding for an instruction.
uint64_t getBinaryCodeForInstr(MCInst const &MI,
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
new file mode 100644
index 0000000..1080935
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -0,0 +1,420 @@
+//=== HexagonMCCompound.cpp - Hexagon Compound checker -------===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file is looks at a packet and tries to form compound insns
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace Hexagon;
+#define DEBUG_TYPE "hexagon-mccompound"
+enum OpcodeIndex {
+ fp0_jump_nt = 0,
+ fp0_jump_t,
+ fp1_jump_nt,
+ fp1_jump_t,
+ tp0_jump_nt,
+ tp0_jump_t,
+ tp1_jump_nt,
+ tp1_jump_t
+unsigned tstBitOpcode[8] = {J4_tstbit0_fp0_jump_nt, J4_tstbit0_fp0_jump_t,
+ J4_tstbit0_fp1_jump_nt, J4_tstbit0_fp1_jump_t,
+ J4_tstbit0_tp0_jump_nt, J4_tstbit0_tp0_jump_t,
+ J4_tstbit0_tp1_jump_nt, J4_tstbit0_tp1_jump_t};
+unsigned cmpeqBitOpcode[8] = {J4_cmpeq_fp0_jump_nt, J4_cmpeq_fp0_jump_t,
+ J4_cmpeq_fp1_jump_nt, J4_cmpeq_fp1_jump_t,
+ J4_cmpeq_tp0_jump_nt, J4_cmpeq_tp0_jump_t,
+ J4_cmpeq_tp1_jump_nt, J4_cmpeq_tp1_jump_t};
+unsigned cmpgtBitOpcode[8] = {J4_cmpgt_fp0_jump_nt, J4_cmpgt_fp0_jump_t,
+ J4_cmpgt_fp1_jump_nt, J4_cmpgt_fp1_jump_t,
+ J4_cmpgt_tp0_jump_nt, J4_cmpgt_tp0_jump_t,
+ J4_cmpgt_tp1_jump_nt, J4_cmpgt_tp1_jump_t};
+unsigned cmpgtuBitOpcode[8] = {J4_cmpgtu_fp0_jump_nt, J4_cmpgtu_fp0_jump_t,
+ J4_cmpgtu_fp1_jump_nt, J4_cmpgtu_fp1_jump_t,
+ J4_cmpgtu_tp0_jump_nt, J4_cmpgtu_tp0_jump_t,
+ J4_cmpgtu_tp1_jump_nt, J4_cmpgtu_tp1_jump_t};
+unsigned cmpeqiBitOpcode[8] = {J4_cmpeqi_fp0_jump_nt, J4_cmpeqi_fp0_jump_t,
+ J4_cmpeqi_fp1_jump_nt, J4_cmpeqi_fp1_jump_t,
+ J4_cmpeqi_tp0_jump_nt, J4_cmpeqi_tp0_jump_t,
+ J4_cmpeqi_tp1_jump_nt, J4_cmpeqi_tp1_jump_t};
+unsigned cmpgtiBitOpcode[8] = {J4_cmpgti_fp0_jump_nt, J4_cmpgti_fp0_jump_t,
+ J4_cmpgti_fp1_jump_nt, J4_cmpgti_fp1_jump_t,
+ J4_cmpgti_tp0_jump_nt, J4_cmpgti_tp0_jump_t,
+ J4_cmpgti_tp1_jump_nt, J4_cmpgti_tp1_jump_t};
+unsigned cmpgtuiBitOpcode[8] = {J4_cmpgtui_fp0_jump_nt, J4_cmpgtui_fp0_jump_t,
+ J4_cmpgtui_fp1_jump_nt, J4_cmpgtui_fp1_jump_t,
+ J4_cmpgtui_tp0_jump_nt, J4_cmpgtui_tp0_jump_t,
+ J4_cmpgtui_tp1_jump_nt, J4_cmpgtui_tp1_jump_t};
+unsigned cmpeqn1BitOpcode[8] = {J4_cmpeqn1_fp0_jump_nt, J4_cmpeqn1_fp0_jump_t,
+ J4_cmpeqn1_fp1_jump_nt, J4_cmpeqn1_fp1_jump_t,
+ J4_cmpeqn1_tp0_jump_nt, J4_cmpeqn1_tp0_jump_t,
+ J4_cmpeqn1_tp1_jump_nt, J4_cmpeqn1_tp1_jump_t};
+unsigned cmpgtn1BitOpcode[8] = {
+ J4_cmpgtn1_fp0_jump_nt, J4_cmpgtn1_fp0_jump_t, J4_cmpgtn1_fp1_jump_nt,
+ J4_cmpgtn1_fp1_jump_t, J4_cmpgtn1_tp0_jump_nt, J4_cmpgtn1_tp0_jump_t,
+ J4_cmpgtn1_tp1_jump_nt, J4_cmpgtn1_tp1_jump_t,
+// enum HexagonII::CompoundGroup
+namespace {
+unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
+ unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
+ switch (MI.getOpcode()) {
+ default:
+ return HexagonII::HCG_None;
+ //
+ // Compound pairs.
+ // "p0=cmp.eq(Rs16,Rt16); if ( jump:nt #r9:2"
+ // "Rd16=#U6 ; jump #r9:2"
+ // "Rd16=Rs16 ; jump #r9:2"
+ //
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgtu:
+ if (IsExtended)
+ return false;
+ DstReg = MI.getOperand(0).getReg();
+ Src1Reg = MI.getOperand(1).getReg();
+ Src2Reg = MI.getOperand(2).getReg();
+ if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C2_cmpgtui:
+ if (IsExtended)
+ return false;
+ // P0 = cmp.eq(Rs,#u2)
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ MI.getOperand(2).isImm() && ((isUInt<5>(MI.getOperand(2).getImm())) ||
+ (MI.getOperand(2).getImm() == -1)))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::A2_tfr:
+ if (IsExtended)
+ return false;
+ // Rd = Rs
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::A2_tfrsi:
+ if (IsExtended)
+ return false;
+ // Rd = #u6
+ DstReg = MI.getOperand(0).getReg();
+ if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() <= 63 &&
+ MI.getOperand(1).getImm() >= 0 &&
+ HexagonMCInstrInfo::isIntRegForSubInst(DstReg))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::S2_tstbit_i:
+ if (IsExtended)
+ return false;
+ DstReg = MI.getOperand(0).getReg();
+ Src1Reg = MI.getOperand(1).getReg();
+ if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+ MI.getOperand(2).isImm() &&
+ HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+ (MI.getOperand(2).getImm() == 0))
+ return HexagonII::HCG_A;
+ break;
+ // The fact that .new form is used pretty much guarantees
+ // that predicate register will match. Nevertheless,
+ // there could be some false positives without additional
+ // checking.
+ case Hexagon::J2_jumptnew:
+ case Hexagon::J2_jumpfnew:
+ case Hexagon::J2_jumptnewpt:
+ case Hexagon::J2_jumpfnewpt:
+ Src1Reg = MI.getOperand(0).getReg();
+ if (Hexagon::P0 == Src1Reg || Hexagon::P1 == Src1Reg)
+ return HexagonII::HCG_B;
+ break;
+ // Transfer and jump:
+ // Rd=#U6 ; jump #r9:2
+ // Rd=Rs ; jump #r9:2
+ // Do not test for jump range here.
+ case Hexagon::J2_jump:
+ return HexagonII::HCG_C;
+ break;
+ }
+ return HexagonII::HCG_None;
+/// getCompoundOp - Return the index from 0-7 into the above opcode lists.
+namespace {
+unsigned getCompoundOp(MCInst const &HMCI) {
+ const MCOperand &Predicate = HMCI.getOperand(0);
+ unsigned PredReg = Predicate.getReg();
+ assert((PredReg == Hexagon::P0) || (PredReg == Hexagon::P1) ||
+ (PredReg == Hexagon::P2) || (PredReg == Hexagon::P3));
+ switch (HMCI.getOpcode()) {
+ default:
+ llvm_unreachable("Expected match not found.\n");
+ break;
+ case Hexagon::J2_jumpfnew:
+ return (PredReg == Hexagon::P0) ? fp0_jump_nt : fp1_jump_nt;
+ case Hexagon::J2_jumpfnewpt:
+ return (PredReg == Hexagon::P0) ? fp0_jump_t : fp1_jump_t;
+ case Hexagon::J2_jumptnew:
+ return (PredReg == Hexagon::P0) ? tp0_jump_nt : tp1_jump_nt;
+ case Hexagon::J2_jumptnewpt:
+ return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t;
+ }
+namespace {
+MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
+ MCInst *CompoundInsn = 0;
+ unsigned compoundOpcode;
+ MCOperand Rs, Rt;
+ switch (L.getOpcode()) {
+ default:
+ DEBUG(dbgs() << "Possible compound ignored\n");
+ return CompoundInsn;
+ case Hexagon::A2_tfrsi:
+ Rt = L.getOperand(0);
+ compoundOpcode = J4_jumpseti;
+ CompoundInsn = new (Context) MCInst;
+ CompoundInsn->setOpcode(compoundOpcode);
+ CompoundInsn->addOperand(Rt);
+ CompoundInsn->addOperand(L.getOperand(1)); // Immediate
+ CompoundInsn->addOperand(R.getOperand(0)); // Jump target
+ break;
+ case Hexagon::A2_tfr:
+ Rt = L.getOperand(0);
+ Rs = L.getOperand(1);
+ compoundOpcode = J4_jumpsetr;
+ CompoundInsn = new (Context) MCInst;
+ CompoundInsn->setOpcode(compoundOpcode);
+ CompoundInsn->addOperand(Rt);
+ CompoundInsn->addOperand(Rs);
+ CompoundInsn->addOperand(R.getOperand(0)); // Jump target.
+ break;
+ case Hexagon::C2_cmpeq:
+ DEBUG(dbgs() << "CX: C2_cmpeq\n");
+ Rs = L.getOperand(1);
+ Rt = L.getOperand(2);
+ compoundOpcode = cmpeqBitOpcode[getCompoundOp(R)];
+ CompoundInsn = new (Context) MCInst;
+ CompoundInsn->setOpcode(compoundOpcode);
+ CompoundInsn->addOperand(Rs);
+ CompoundInsn->addOperand(Rt);
+ CompoundInsn->addOperand(R.getOperand(1));
+ break;
+ case Hexagon::C2_cmpgt:
+ DEBUG(dbgs() << "CX: C2_cmpgt\n");
+ Rs = L.getOperand(1);
+ Rt = L.getOperand(2);
+ compoundOpcode = cmpgtBitOpcode[getCompoundOp(R)];
+ CompoundInsn = new (Context) MCInst;
+ CompoundInsn->setOpcode(compoundOpcode);
+ CompoundInsn->addOperand(Rs);
+ CompoundInsn->addOperand(Rt);
+ CompoundInsn->addOperand(R.getOperand(1));
+ break;
+ case Hexagon::C2_cmpgtu:
+ DEBUG(dbgs() << "CX: C2_cmpgtu\n");
+ Rs = L.getOperand(1);
+ Rt = L.getOperand(2);
+ compoundOpcode = cmpgtuBitOpcode[getCompoundOp(R)];
+ CompoundInsn = new (Context) MCInst;
+ CompoundInsn->setOpcode(compoundOpcode);
+ CompoundInsn->addOperand(Rs);
+ CompoundInsn->addOperand(Rt);
+ CompoundInsn->addOperand(R.getOperand(1));
+ break;
+ case Hexagon::C2_cmpeqi:
+ DEBUG(dbgs() << "CX: C2_cmpeqi\n");
+ if (L.getOperand(2).getImm() == -1)
+ compoundOpcode = cmpeqn1BitOpcode[getCompoundOp(R)];
+ else
+ compoundOpcode = cmpeqiBitOpcode[getCompoundOp(R)];
+ Rs = L.getOperand(1);
+ CompoundInsn = new (Context) MCInst;
+ CompoundInsn->setOpcode(compoundOpcode);
+ CompoundInsn->addOperand(Rs);
+ if (L.getOperand(2).getImm() != -1)
+ CompoundInsn->addOperand(L.getOperand(2));
+ CompoundInsn->addOperand(R.getOperand(1));
+ break;
+ case Hexagon::C2_cmpgti:
+ DEBUG(dbgs() << "CX: C2_cmpgti\n");
+ if (L.getOperand(2).getImm() == -1)
+ compoundOpcode = cmpgtn1BitOpcode[getCompoundOp(R)];
+ else
+ compoundOpcode = cmpgtiBitOpcode[getCompoundOp(R)];
+ Rs = L.getOperand(1);
+ CompoundInsn = new (Context) MCInst;
+ CompoundInsn->setOpcode(compoundOpcode);
+ CompoundInsn->addOperand(Rs);
+ if (L.getOperand(2).getImm() != -1)
+ CompoundInsn->addOperand(L.getOperand(2));
+ CompoundInsn->addOperand(R.getOperand(1));
+ break;
+ case Hexagon::C2_cmpgtui:
+ DEBUG(dbgs() << "CX: C2_cmpgtui\n");
+ Rs = L.getOperand(1);
+ compoundOpcode = cmpgtuiBitOpcode[getCompoundOp(R)];
+ CompoundInsn = new (Context) MCInst;
+ CompoundInsn->setOpcode(compoundOpcode);
+ CompoundInsn->addOperand(Rs);
+ CompoundInsn->addOperand(L.getOperand(2));
+ CompoundInsn->addOperand(R.getOperand(1));
+ break;
+ case Hexagon::S2_tstbit_i:
+ DEBUG(dbgs() << "CX: S2_tstbit_i\n");
+ Rs = L.getOperand(1);
+ compoundOpcode = tstBitOpcode[getCompoundOp(R)];
+ CompoundInsn = new (Context) MCInst;
+ CompoundInsn->setOpcode(compoundOpcode);
+ CompoundInsn->addOperand(Rs);
+ CompoundInsn->addOperand(R.getOperand(1));
+ break;
+ }
+ return CompoundInsn;
+/// Non-Symmetrical. See if these two instructions are fit for compound pair.
+namespace {
+bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
+ MCInst const &MIb, bool IsExtendedB) {
+ unsigned MIaG = getCompoundCandidateGroup(MIa, IsExtendedA);
+ unsigned MIbG = getCompoundCandidateGroup(MIb, IsExtendedB);
+ // We have two candidates - check that this is the same register
+ // we are talking about.
+ unsigned Opca = MIa.getOpcode();
+ if (MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_C &&
+ (Opca == Hexagon::A2_tfr || Opca == Hexagon::A2_tfrsi))
+ return true;
+ return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) &&
+ (MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg()));
+namespace {
+bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
+ assert(HexagonMCInstrInfo::isBundle(MCI));
+ bool JExtended = false;
+ for (MCInst::iterator J =
+ MCI.begin() + HexagonMCInstrInfo::bundleInstructionsOffset;
+ J != MCI.end(); ++J) {
+ MCInst const *JumpInst = J->getInst();
+ if (HexagonMCInstrInfo::isImmext(*JumpInst)) {
+ JExtended = true;
+ continue;
+ }
+ if (llvm::HexagonMCInstrInfo::getType(MCII, *JumpInst) ==
+ HexagonII::TypeJ) {
+ // Try to pair with another insn (B)undled with jump.
+ bool BExtended = false;
+ for (MCInst::iterator B =
+ MCI.begin() + HexagonMCInstrInfo::bundleInstructionsOffset;
+ B != MCI.end(); ++B) {
+ MCInst const *Inst = B->getInst();
+ if (JumpInst == Inst)
+ continue;
+ if (HexagonMCInstrInfo::isImmext(*Inst)) {
+ BExtended = true;
+ continue;
+ }
+ DEBUG(dbgs() << "J,B: " << JumpInst->getOpcode() << ","
+ << Inst->getOpcode() << "\n");
+ if (isOrderedCompoundPair(*Inst, BExtended, *JumpInst, JExtended)) {
+ MCInst *CompoundInsn = getCompoundInsn(Context, *Inst, *JumpInst);
+ if (CompoundInsn) {
+ DEBUG(dbgs() << "B: " << Inst->getOpcode() << ","
+ << JumpInst->getOpcode() << " Compounds to "
+ << CompoundInsn->getOpcode() << "\n");
+ J->setInst(CompoundInsn);
+ MCI.erase(B);
+ return true;
+ }
+ }
+ BExtended = false;
+ }
+ }
+ JExtended = false;
+ }
+ return false;
+/// tryCompound - Given a bundle check for compound insns when one
+/// is found update the contents fo the bundle with the compound insn.
+/// If a compound instruction is found then the bundle will have one
+/// additional slot.
+void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII,
+ MCContext &Context, MCInst &MCI) {
+ assert(MCI.getOpcode() == Hexagon::BUNDLE &&
+ "Non-Bundle where Bundle expected");
+ // By definition a compound must have 2 insn.
+ if (MCI.size() < 2)
+ return;
+ // Look for compounds until none are found, only update the bundle when
+ // a compound is found.
+ while (lookForCompound(MCII, Context, MCI))
+ ;
+ return;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
new file mode 100644
index 0000000..eb62977
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -0,0 +1,1100 @@
+//===----- HexagonMCDuplexInfo.cpp - Instruction bundle checking ----------===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This implements duplexing of instructions to reduce code size
+#include "HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+using namespace llvm;
+using namespace Hexagon;
+#define DEBUG_TYPE "hexagon-mcduplex-info"
+// pair table of subInstructions with opcodes
+static std::pair<unsigned, unsigned> opcodeData[] = {
+ std::make_pair((unsigned)V4_SA1_addi, 0),
+ std::make_pair((unsigned)V4_SA1_addrx, 6144),
+ std::make_pair((unsigned)V4_SA1_addsp, 3072),
+ std::make_pair((unsigned)V4_SA1_and1, 4608),
+ std::make_pair((unsigned)V4_SA1_clrf, 6768),
+ std::make_pair((unsigned)V4_SA1_clrfnew, 6736),
+ std::make_pair((unsigned)V4_SA1_clrt, 6752),
+ std::make_pair((unsigned)V4_SA1_clrtnew, 6720),
+ std::make_pair((unsigned)V4_SA1_cmpeqi, 6400),
+ std::make_pair((unsigned)V4_SA1_combine0i, 7168),
+ std::make_pair((unsigned)V4_SA1_combine1i, 7176),
+ std::make_pair((unsigned)V4_SA1_combine2i, 7184),
+ std::make_pair((unsigned)V4_SA1_combine3i, 7192),
+ std::make_pair((unsigned)V4_SA1_combinerz, 7432),
+ std::make_pair((unsigned)V4_SA1_combinezr, 7424),
+ std::make_pair((unsigned)V4_SA1_dec, 4864),
+ std::make_pair((unsigned)V4_SA1_inc, 4352),
+ std::make_pair((unsigned)V4_SA1_seti, 2048),
+ std::make_pair((unsigned)V4_SA1_setin1, 6656),
+ std::make_pair((unsigned)V4_SA1_sxtb, 5376),
+ std::make_pair((unsigned)V4_SA1_sxth, 5120),
+ std::make_pair((unsigned)V4_SA1_tfr, 4096),
+ std::make_pair((unsigned)V4_SA1_zxtb, 5888),
+ std::make_pair((unsigned)V4_SA1_zxth, 5632),
+ std::make_pair((unsigned)V4_SL1_loadri_io, 0),
+ std::make_pair((unsigned)V4_SL1_loadrub_io, 4096),
+ std::make_pair((unsigned)V4_SL2_deallocframe, 7936),
+ std::make_pair((unsigned)V4_SL2_jumpr31, 8128),
+ std::make_pair((unsigned)V4_SL2_jumpr31_f, 8133),
+ std::make_pair((unsigned)V4_SL2_jumpr31_fnew, 8135),
+ std::make_pair((unsigned)V4_SL2_jumpr31_t, 8132),
+ std::make_pair((unsigned)V4_SL2_jumpr31_tnew, 8134),
+ std::make_pair((unsigned)V4_SL2_loadrb_io, 4096),
+ std::make_pair((unsigned)V4_SL2_loadrd_sp, 7680),
+ std::make_pair((unsigned)V4_SL2_loadrh_io, 0),
+ std::make_pair((unsigned)V4_SL2_loadri_sp, 7168),
+ std::make_pair((unsigned)V4_SL2_loadruh_io, 2048),
+ std::make_pair((unsigned)V4_SL2_return, 8000),
+ std::make_pair((unsigned)V4_SL2_return_f, 8005),
+ std::make_pair((unsigned)V4_SL2_return_fnew, 8007),
+ std::make_pair((unsigned)V4_SL2_return_t, 8004),
+ std::make_pair((unsigned)V4_SL2_return_tnew, 8006),
+ std::make_pair((unsigned)V4_SS1_storeb_io, 4096),
+ std::make_pair((unsigned)V4_SS1_storew_io, 0),
+ std::make_pair((unsigned)V4_SS2_allocframe, 7168),
+ std::make_pair((unsigned)V4_SS2_storebi0, 4608),
+ std::make_pair((unsigned)V4_SS2_storebi1, 4864),
+ std::make_pair((unsigned)V4_SS2_stored_sp, 2560),
+ std::make_pair((unsigned)V4_SS2_storeh_io, 0),
+ std::make_pair((unsigned)V4_SS2_storew_sp, 2048),
+ std::make_pair((unsigned)V4_SS2_storewi0, 4096),
+ std::make_pair((unsigned)V4_SS2_storewi1, 4352)};
+static std::map<unsigned, unsigned>
+ subinstOpcodeMap(opcodeData,
+ opcodeData + sizeof(opcodeData) / sizeof(opcodeData[0]));
+bool HexagonMCInstrInfo::isDuplexPairMatch(unsigned Ga, unsigned Gb) {
+ switch (Ga) {
+ case HexagonII::HSIG_None:
+ default:
+ return false;
+ case HexagonII::HSIG_L1:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_L2:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+ Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_S1:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+ Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_S2:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+ Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_S2 ||
+ Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_A:
+ return (Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_Compound:
+ return (Gb == HexagonII::HSIG_Compound);
+ }
+ return false;
+unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) {
+ switch (Ga) {
+ case HexagonII::HSIG_None:
+ default:
+ break;
+ case HexagonII::HSIG_L1:
+ switch (Gb) {
+ default:
+ break;
+ case HexagonII::HSIG_L1:
+ return 0;
+ case HexagonII::HSIG_A:
+ return 0x4;
+ }
+ case HexagonII::HSIG_L2:
+ switch (Gb) {
+ default:
+ break;
+ case HexagonII::HSIG_L1:
+ return 0x1;
+ case HexagonII::HSIG_L2:
+ return 0x2;
+ case HexagonII::HSIG_A:
+ return 0x5;
+ }
+ case HexagonII::HSIG_S1:
+ switch (Gb) {
+ default:
+ break;
+ case HexagonII::HSIG_L1:
+ return 0x8;
+ case HexagonII::HSIG_L2:
+ return 0x9;
+ case HexagonII::HSIG_S1:
+ return 0xA;
+ case HexagonII::HSIG_A:
+ return 0x6;
+ }
+ case HexagonII::HSIG_S2:
+ switch (Gb) {
+ default:
+ break;
+ case HexagonII::HSIG_L1:
+ return 0xC;
+ case HexagonII::HSIG_L2:
+ return 0xD;
+ case HexagonII::HSIG_S1:
+ return 0xB;
+ case HexagonII::HSIG_S2:
+ return 0xE;
+ case HexagonII::HSIG_A:
+ return 0x7;
+ }
+ case HexagonII::HSIG_A:
+ switch (Gb) {
+ default:
+ break;
+ case HexagonII::HSIG_A:
+ return 0x3;
+ }
+ case HexagonII::HSIG_Compound:
+ switch (Gb) {
+ case HexagonII::HSIG_Compound:
+ return 0xFFFFFFFF;
+ }
+ }
+ return 0xFFFFFFFF;
+unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
+ unsigned DstReg, PredReg, SrcReg, Src1Reg, Src2Reg;
+ switch (MCI.getOpcode()) {
+ default:
+ return HexagonII::HSIG_None;
+ //
+ // Group L1:
+ //
+ // Rd = memw(Rs+#u4:2)
+ // Rd = memub(Rs+#u4:0)
+ case Hexagon::L2_loadri_io:
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ // Special case this one from Group L2.
+ // Rd = memw(r29+#u5:2)
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
+ if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg &&
+ MCI.getOperand(2).isImm() &&
+ isShiftedUInt<5, 2>(MCI.getOperand(2).getImm())) {
+ return HexagonII::HSIG_L2;
+ }
+ // Rd = memw(Rs+#u4:2)
+ if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ (MCI.getOperand(2).isImm() &&
+ isShiftedUInt<4, 2>(MCI.getOperand(2).getImm()))) {
+ return HexagonII::HSIG_L1;
+ }
+ }
+ break;
+ case Hexagon::L2_loadrub_io:
+ // Rd = memub(Rs+#u4:0)
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ MCI.getOperand(2).isImm() && isUInt<4>(MCI.getOperand(2).getImm())) {
+ return HexagonII::HSIG_L1;
+ }
+ break;
+ //
+ // Group L2:
+ //
+ // Rd = memh/memuh(Rs+#u3:1)
+ // Rd = memb(Rs+#u3:0)
+ // Rd = memw(r29+#u5:2) - Handled above.
+ // Rdd = memd(r29+#u5:3)
+ // deallocframe
+ // [if ([!]p0[.new])] dealloc_return
+ // [if ([!]p0[.new])] jumpr r31
+ case Hexagon::L2_loadrh_io:
+ case Hexagon::L2_loadruh_io:
+ // Rd = memh/memuh(Rs+#u3:1)
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ MCI.getOperand(2).isImm() &&
+ isShiftedUInt<3, 1>(MCI.getOperand(2).getImm())) {
+ return HexagonII::HSIG_L2;
+ }
+ break;
+ case Hexagon::L2_loadrb_io:
+ // Rd = memb(Rs+#u3:0)
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ MCI.getOperand(2).isImm() && isUInt<3>(MCI.getOperand(2).getImm())) {
+ return HexagonII::HSIG_L2;
+ }
+ break;
+ case Hexagon::L2_loadrd_io:
+ // Rdd = memd(r29+#u5:3)
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg &&
+ MCI.getOperand(2).isImm() &&
+ isShiftedUInt<5, 3>(MCI.getOperand(2).getImm())) {
+ return HexagonII::HSIG_L2;
+ }
+ break;
+ case Hexagon::L4_return:
+ case Hexagon::L2_deallocframe:
+ return HexagonII::HSIG_L2;
+ case Hexagon::EH_RETURN_JMPR:
+ case Hexagon::J2_jumpr:
+ case Hexagon::JMPret:
+ // jumpr r31
+ // Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>.
+ DstReg = MCI.getOperand(0).getReg();
+ if (Hexagon::R31 == DstReg) {
+ return HexagonII::HSIG_L2;
+ }
+ break;
+ case Hexagon::J2_jumprt:
+ case Hexagon::J2_jumprf:
+ case Hexagon::J2_jumprtnew:
+ case Hexagon::J2_jumprfnew:
+ case Hexagon::JMPrett:
+ case Hexagon::JMPretf:
+ case Hexagon::JMPrettnew:
+ case Hexagon::JMPretfnew:
+ case Hexagon::JMPrettnewpt:
+ case Hexagon::JMPretfnewpt:
+ DstReg = MCI.getOperand(1).getReg();
+ SrcReg = MCI.getOperand(0).getReg();
+ // [if ([!]p0[.new])] jumpr r31
+ if ((HexagonMCInstrInfo::isPredReg(SrcReg) && (Hexagon::P0 == SrcReg)) &&
+ (Hexagon::R31 == DstReg)) {
+ return HexagonII::HSIG_L2;
+ }
+ break;
+ case Hexagon::L4_return_t:
+ case Hexagon::L4_return_f:
+ case Hexagon::L4_return_tnew_pnt:
+ case Hexagon::L4_return_fnew_pnt:
+ case Hexagon::L4_return_tnew_pt:
+ case Hexagon::L4_return_fnew_pt:
+ // [if ([!]p0[.new])] dealloc_return
+ SrcReg = MCI.getOperand(0).getReg();
+ if (Hexagon::P0 == SrcReg) {
+ return HexagonII::HSIG_L2;
+ }
+ break;
+ //
+ // Group S1:
+ //
+ // memw(Rs+#u4:2) = Rt
+ // memb(Rs+#u4:0) = Rt
+ case Hexagon::S2_storeri_io:
+ // Special case this one from Group S2.
+ // memw(r29+#u5:2) = Rt
+ Src1Reg = MCI.getOperand(0).getReg();
+ Src2Reg = MCI.getOperand(2).getReg();
+ if (HexagonMCInstrInfo::isIntReg(Src1Reg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
+ Hexagon::R29 == Src1Reg && MCI.getOperand(1).isImm() &&
+ isShiftedUInt<5, 2>(MCI.getOperand(1).getImm())) {
+ return HexagonII::HSIG_S2;
+ }
+ // memw(Rs+#u4:2) = Rt
+ if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
+ MCI.getOperand(1).isImm() &&
+ isShiftedUInt<4, 2>(MCI.getOperand(1).getImm())) {
+ return HexagonII::HSIG_S1;
+ }
+ break;
+ case Hexagon::S2_storerb_io:
+ // memb(Rs+#u4:0) = Rt
+ Src1Reg = MCI.getOperand(0).getReg();
+ Src2Reg = MCI.getOperand(2).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
+ MCI.getOperand(1).isImm() && isUInt<4>(MCI.getOperand(1).getImm())) {
+ return HexagonII::HSIG_S1;
+ }
+ break;
+ //
+ // Group S2:
+ //
+ // memh(Rs+#u3:1) = Rt
+ // memw(r29+#u5:2) = Rt
+ // memd(r29+#s6:3) = Rtt
+ // memw(Rs+#u4:2) = #U1
+ // memb(Rs+#u4) = #U1
+ // allocframe(#u5:3)
+ case Hexagon::S2_storerh_io:
+ // memh(Rs+#u3:1) = Rt
+ Src1Reg = MCI.getOperand(0).getReg();
+ Src2Reg = MCI.getOperand(2).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
+ MCI.getOperand(1).isImm() &&
+ isShiftedUInt<3, 1>(MCI.getOperand(1).getImm())) {
+ return HexagonII::HSIG_S2;
+ }
+ break;
+ case Hexagon::S2_storerd_io:
+ // memd(r29+#s6:3) = Rtt
+ Src1Reg = MCI.getOperand(0).getReg();
+ Src2Reg = MCI.getOperand(2).getReg();
+ if (HexagonMCInstrInfo::isDblRegForSubInst(Src2Reg) &&
+ HexagonMCInstrInfo::isIntReg(Src1Reg) && Hexagon::R29 == Src1Reg &&
+ MCI.getOperand(1).isImm() &&
+ isShiftedInt<6, 3>(MCI.getOperand(1).getImm())) {
+ return HexagonII::HSIG_S2;
+ }
+ break;
+ case Hexagon::S4_storeiri_io:
+ // memw(Rs+#u4:2) = #U1
+ Src1Reg = MCI.getOperand(0).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+ MCI.getOperand(1).isImm() &&
+ isShiftedUInt<4, 2>(MCI.getOperand(1).getImm()) &&
+ MCI.getOperand(2).isImm() && isUInt<1>(MCI.getOperand(2).getImm())) {
+ return HexagonII::HSIG_S2;
+ }
+ break;
+ case Hexagon::S4_storeirb_io:
+ // memb(Rs+#u4) = #U1
+ Src1Reg = MCI.getOperand(0).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+ MCI.getOperand(1).isImm() && isUInt<4>(MCI.getOperand(1).getImm()) &&
+ MCI.getOperand(2).isImm() && MCI.getOperand(2).isImm() &&
+ isUInt<1>(MCI.getOperand(2).getImm())) {
+ return HexagonII::HSIG_S2;
+ }
+ break;
+ case Hexagon::S2_allocframe:
+ if (MCI.getOperand(0).isImm() &&
+ isShiftedUInt<5, 3>(MCI.getOperand(0).getImm())) {
+ return HexagonII::HSIG_S2;
+ }
+ break;
+ //
+ // Group A:
+ //
+ // Rx = add(Rx,#s7)
+ // Rd = Rs
+ // Rd = #u6
+ // Rd = #-1
+ // if ([!]P0[.new]) Rd = #0
+ // Rd = add(r29,#u6:2)
+ // Rx = add(Rx,Rs)
+ // P0 = cmp.eq(Rs,#u2)
+ // Rdd = combine(#0,Rs)
+ // Rdd = combine(Rs,#0)
+ // Rdd = combine(#u2,#U2)
+ // Rd = add(Rs,#1)
+ // Rd = add(Rs,#-1)
+ // Rd = sxth/sxtb/zxtb/zxth(Rs)
+ // Rd = and(Rs,#1)
+ case Hexagon::A2_addi:
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
+ // Rd = add(r29,#u6:2)
+ if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg &&
+ MCI.getOperand(2).isImm() &&
+ isShiftedUInt<6, 2>(MCI.getOperand(2).getImm())) {
+ return HexagonII::HSIG_A;
+ }
+ // Rx = add(Rx,#s7)
+ if (DstReg == SrcReg) {
+ return HexagonII::HSIG_A;
+ }
+ // Rd = add(Rs,#1)
+ // Rd = add(Rs,#-1)
+ if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ MCI.getOperand(2).isImm() && ((MCI.getOperand(2).getImm() == 1) ||
+ (MCI.getOperand(2).getImm() == -1))) {
+ return HexagonII::HSIG_A;
+ }
+ }
+ break;
+ case Hexagon::A2_add:
+ // Rx = add(Rx,Rs)
+ DstReg = MCI.getOperand(0).getReg();
+ Src1Reg = MCI.getOperand(1).getReg();
+ Src2Reg = MCI.getOperand(2).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && (DstReg == Src1Reg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg)) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::A2_andir:
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ MCI.getOperand(2).isImm() && ((MCI.getOperand(2).getImm() == 1) ||
+ (MCI.getOperand(2).getImm() == 255))) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::A2_tfr:
+ // Rd = Rs
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg)) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::A2_tfrsi:
+ DstReg = MCI.getOperand(0).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::C2_cmoveit:
+ case Hexagon::C2_cmovenewit:
+ case Hexagon::C2_cmoveif:
+ case Hexagon::C2_cmovenewif:
+ // if ([!]P0[.new]) Rd = #0
+ // Actual form:
+ // %R16<def> = C2_cmovenewit %P0<internal>, 0, %R16<imp-use,undef>;
+ DstReg = MCI.getOperand(0).getReg(); // Rd
+ PredReg = MCI.getOperand(1).getReg(); // P0
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+ Hexagon::P0 == PredReg && MCI.getOperand(2).isImm() &&
+ MCI.getOperand(2).getImm() == 0) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::C2_cmpeqi:
+ // P0 = cmp.eq(Rs,#u2)
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (Hexagon::P0 == DstReg &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ MCI.getOperand(2).isImm() && isUInt<2>(MCI.getOperand(2).getImm())) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineii:
+ // Rdd = combine(#u2,#U2)
+ DstReg = MCI.getOperand(0).getReg();
+ if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
+ // TODO: Handle Globals/Symbols
+ (MCI.getOperand(1).isImm() && isUInt<2>(MCI.getOperand(1).getImm())) &&
+ ((MCI.getOperand(2).isImm() &&
+ isUInt<2>(MCI.getOperand(2).getImm())))) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::A4_combineri:
+ // Rdd = combine(Rs,#0)
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ (MCI.getOperand(2).isImm() && MCI.getOperand(2).getImm() == 0)) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::A4_combineir:
+ // Rdd = combine(#0,Rs)
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(2).getReg();
+ if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ (MCI.getOperand(1).isImm() && MCI.getOperand(1).getImm() == 0)) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::A2_sxtb:
+ case Hexagon::A2_sxth:
+ case Hexagon::A2_zxtb:
+ case Hexagon::A2_zxth:
+ // Rd = sxth/sxtb/zxtb/zxth(Rs)
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg)) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ }
+ return HexagonII::HSIG_None;
+bool HexagonMCInstrInfo::subInstWouldBeExtended(MCInst const &potentialDuplex) {
+ unsigned DstReg, SrcReg;
+ switch (potentialDuplex.getOpcode()) {
+ case Hexagon::A2_addi:
+ // testing for case of: Rx = add(Rx,#s7)
+ DstReg = potentialDuplex.getOperand(0).getReg();
+ SrcReg = potentialDuplex.getOperand(1).getReg();
+ if (DstReg == SrcReg && HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
+ if (potentialDuplex.getOperand(2).isExpr())
+ return true;
+ if (potentialDuplex.getOperand(2).isImm() &&
+ !(isShiftedInt<7, 0>(potentialDuplex.getOperand(2).getImm())))
+ return true;
+ }
+ break;
+ case Hexagon::A2_tfrsi:
+ DstReg = potentialDuplex.getOperand(0).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
+ if (potentialDuplex.getOperand(1).isExpr())
+ return true;
+ // Check for case of Rd = #-1.
+ if (potentialDuplex.getOperand(1).isImm() &&
+ (potentialDuplex.getOperand(1).getImm() == -1))
+ return false;
+ // Check for case of Rd = #u6.
+ if (potentialDuplex.getOperand(1).isImm() &&
+ !isShiftedUInt<6, 0>(potentialDuplex.getOperand(1).getImm()))
+ return true;
+ }
+ break;
+ default:
+ break;
+ }
+ return false;
+/// non-Symmetrical. See if these two instructions are fit for duplex pair.
+bool HexagonMCInstrInfo::isOrderedDuplexPair(MCInstrInfo const &MCII,
+ MCInst const &MIa, bool ExtendedA,
+ MCInst const &MIb, bool ExtendedB,
+ bool bisReversable) {
+ // Slot 1 cannot be extended in duplexes PRM 10.5
+ if (ExtendedA)
+ return false;
+ // Only A2_addi and A2_tfrsi can be extended in duplex form PRM 10.5
+ if (ExtendedB) {
+ unsigned Opcode = MIb.getOpcode();
+ if ((Opcode != Hexagon::A2_addi) && (Opcode != Hexagon::A2_tfrsi))
+ return false;
+ }
+ unsigned MIaG = HexagonMCInstrInfo::getDuplexCandidateGroup(MIa),
+ MIbG = HexagonMCInstrInfo::getDuplexCandidateGroup(MIb);
+ // If a duplex contains 2 insns in the same group, the insns must be
+ // ordered such that the numerically smaller opcode is in slot 1.
+ if ((MIaG != HexagonII::HSIG_None) && (MIaG == MIbG) && bisReversable) {
+ MCInst SubInst0 = HexagonMCInstrInfo::deriveSubInst(MIa);
+ MCInst SubInst1 = HexagonMCInstrInfo::deriveSubInst(MIb);
+ unsigned zeroedSubInstS0 =
+ subinstOpcodeMap.find(SubInst0.getOpcode())->second;
+ unsigned zeroedSubInstS1 =
+ subinstOpcodeMap.find(SubInst1.getOpcode())->second;
+ if (zeroedSubInstS0 < zeroedSubInstS1)
+ // subinstS0 (maps to slot 0) must be greater than
+ // subinstS1 (maps to slot 1)
+ return false;
+ }
+ // allocframe must always be in slot 0
+ if (MIb.getOpcode() == Hexagon::S2_allocframe)
+ return false;
+ if ((MIaG != HexagonII::HSIG_None) && (MIbG != HexagonII::HSIG_None)) {
+ // Prevent 2 instructions with extenders from duplexing
+ // Note that MIb (slot1) can be extended and MIa (slot0)
+ // can never be extended
+ if (subInstWouldBeExtended(MIa))
+ return false;
+ // If duplexing produces an extender, but the original did not
+ // have an extender, do not duplex.
+ if (subInstWouldBeExtended(MIb) && !ExtendedB)
+ return false;
+ }
+ // If jumpr r31 appears, it must be in slot 0, and never slot 1 (MIb).
+ if (MIbG == HexagonII::HSIG_L2) {
+ if ((MIb.getNumOperands() > 1) && MIb.getOperand(1).isReg() &&
+ (MIb.getOperand(1).getReg() == Hexagon::R31))
+ return false;
+ if ((MIb.getNumOperands() > 0) && MIb.getOperand(0).isReg() &&
+ (MIb.getOperand(0).getReg() == Hexagon::R31))
+ return false;
+ }
+ // If a store appears, it must be in slot 0 (MIa) 1st, and then slot 1 (MIb);
+ // therefore, not duplexable if slot 1 is a store, and slot 0 is not.
+ if ((MIbG == HexagonII::HSIG_S1) || (MIbG == HexagonII::HSIG_S2)) {
+ if ((MIaG != HexagonII::HSIG_S1) && (MIaG != HexagonII::HSIG_S2))
+ return false;
+ }
+ return (isDuplexPairMatch(MIaG, MIbG));
+/// Symmetrical. See if these two instructions are fit for duplex pair.
+bool HexagonMCInstrInfo::isDuplexPair(MCInst const &MIa, MCInst const &MIb) {
+ unsigned MIaG = getDuplexCandidateGroup(MIa),
+ MIbG = getDuplexCandidateGroup(MIb);
+ return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG));
+inline static void addOps(MCInst &subInstPtr, MCInst const &Inst,
+ unsigned opNum) {
+ if (Inst.getOperand(opNum).isReg()) {
+ switch (Inst.getOperand(opNum).getReg()) {
+ default:
+ llvm_unreachable("Not Duplexable Register");
+ break;
+ case Hexagon::R0:
+ case Hexagon::R1:
+ case Hexagon::R2:
+ case Hexagon::R3:
+ case Hexagon::R4:
+ case Hexagon::R5:
+ case Hexagon::R6:
+ case Hexagon::R7:
+ case Hexagon::D0:
+ case Hexagon::D1:
+ case Hexagon::D2:
+ case Hexagon::D3:
+ case Hexagon::R16:
+ case Hexagon::R17:
+ case Hexagon::R18:
+ case Hexagon::R19:
+ case Hexagon::R20:
+ case Hexagon::R21:
+ case Hexagon::R22:
+ case Hexagon::R23:
+ case Hexagon::D8:
+ case Hexagon::D9:
+ case Hexagon::D10:
+ case Hexagon::D11:
+ subInstPtr.addOperand(Inst.getOperand(opNum));
+ break;
+ }
+ } else
+ subInstPtr.addOperand(Inst.getOperand(opNum));
+MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
+ MCInst Result;
+ switch (Inst.getOpcode()) {
+ default:
+ // dbgs() << "opcode: "<< Inst->getOpcode() << "\n";
+ llvm_unreachable("Unimplemented subinstruction \n");
+ break;
+ case Hexagon::A2_addi:
+ if (Inst.getOperand(2).isImm() && Inst.getOperand(2).getImm() == 1) {
+ Result.setOpcode(Hexagon::V4_SA1_inc);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break;
+ } // 1,2 SUBInst $Rd = add($Rs, #1)
+ else if (Inst.getOperand(2).isImm() && Inst.getOperand(2).getImm() == -1) {
+ Result.setOpcode(Hexagon::V4_SA1_dec);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break;
+ } // 1,2 SUBInst $Rd = add($Rs,#-1)
+ else if (Inst.getOperand(1).getReg() == Hexagon::R29) {
+ Result.setOpcode(Hexagon::V4_SA1_addsp);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 2);
+ break;
+ } // 1,3 SUBInst $Rd = add(r29, #$u6_2)
+ else {
+ Result.setOpcode(Hexagon::V4_SA1_addi);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break;
+ } // 1,2,3 SUBInst $Rx = add($Rx, #$s7)
+ case Hexagon::A2_add:
+ Result.setOpcode(Hexagon::V4_SA1_addrx);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1,2,3 SUBInst $Rx = add($_src_, $Rs)
+ case Hexagon::S2_allocframe:
+ Result.setOpcode(Hexagon::V4_SS2_allocframe);
+ addOps(Result, Inst, 0);
+ break; // 1 SUBInst allocframe(#$u5_3)
+ case Hexagon::A2_andir:
+ if (Inst.getOperand(2).getImm() == 255) {
+ Result.setOpcode(Hexagon::V4_SA1_zxtb);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 $Rd = and($Rs, #255)
+ } else {
+ Result.setOpcode(Hexagon::V4_SA1_and1);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 SUBInst $Rd = and($Rs, #1)
+ }
+ case Hexagon::C2_cmpeqi:
+ Result.setOpcode(Hexagon::V4_SA1_cmpeqi);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 2,3 SUBInst p0 = cmp.eq($Rs, #$u2)
+ case Hexagon::A4_combineii:
+ case Hexagon::A2_combineii:
+ if (Inst.getOperand(1).getImm() == 1) {
+ Result.setOpcode(Hexagon::V4_SA1_combine1i);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 2);
+ break; // 1,3 SUBInst $Rdd = combine(#1, #$u2)
+ }
+ if (Inst.getOperand(1).getImm() == 3) {
+ Result.setOpcode(Hexagon::V4_SA1_combine3i);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 2);
+ break; // 1,3 SUBInst $Rdd = combine(#3, #$u2)
+ }
+ if (Inst.getOperand(1).getImm() == 0) {
+ Result.setOpcode(Hexagon::V4_SA1_combine0i);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 2);
+ break; // 1,3 SUBInst $Rdd = combine(#0, #$u2)
+ }
+ if (Inst.getOperand(1).getImm() == 2) {
+ Result.setOpcode(Hexagon::V4_SA1_combine2i);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 2);
+ break; // 1,3 SUBInst $Rdd = combine(#2, #$u2)
+ }
+ case Hexagon::A4_combineir:
+ Result.setOpcode(Hexagon::V4_SA1_combinezr);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 2);
+ break; // 1,3 SUBInst $Rdd = combine(#0, $Rs)
+ case Hexagon::A4_combineri:
+ Result.setOpcode(Hexagon::V4_SA1_combinerz);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 SUBInst $Rdd = combine($Rs, #0)
+ case Hexagon::L4_return_tnew_pnt:
+ case Hexagon::L4_return_tnew_pt:
+ Result.setOpcode(Hexagon::V4_SL2_return_tnew);
+ break; // none SUBInst if ( dealloc_return:nt
+ case Hexagon::L4_return_fnew_pnt:
+ case Hexagon::L4_return_fnew_pt:
+ Result.setOpcode(Hexagon::V4_SL2_return_fnew);
+ break; // none SUBInst if (! dealloc_return:nt
+ case Hexagon::L4_return_f:
+ Result.setOpcode(Hexagon::V4_SL2_return_f);
+ break; // none SUBInst if (!p0) dealloc_return
+ case Hexagon::L4_return_t:
+ Result.setOpcode(Hexagon::V4_SL2_return_t);
+ break; // none SUBInst if (p0) dealloc_return
+ case Hexagon::L4_return:
+ Result.setOpcode(Hexagon::V4_SL2_return);
+ break; // none SUBInst dealloc_return
+ case Hexagon::L2_deallocframe:
+ Result.setOpcode(Hexagon::V4_SL2_deallocframe);
+ break; // none SUBInst deallocframe
+ case Hexagon::EH_RETURN_JMPR:
+ case Hexagon::J2_jumpr:
+ case Hexagon::JMPret:
+ Result.setOpcode(Hexagon::V4_SL2_jumpr31);
+ break; // none SUBInst jumpr r31
+ case Hexagon::J2_jumprf:
+ case Hexagon::JMPretf:
+ Result.setOpcode(Hexagon::V4_SL2_jumpr31_f);
+ break; // none SUBInst if (!p0) jumpr r31
+ case Hexagon::J2_jumprfnew:
+ case Hexagon::JMPretfnewpt:
+ case Hexagon::JMPretfnew:
+ Result.setOpcode(Hexagon::V4_SL2_jumpr31_fnew);
+ break; // none SUBInst if (! jumpr:nt r31
+ case Hexagon::J2_jumprt:
+ case Hexagon::JMPrett:
+ Result.setOpcode(Hexagon::V4_SL2_jumpr31_t);
+ break; // none SUBInst if (p0) jumpr r31
+ case Hexagon::J2_jumprtnew:
+ case Hexagon::JMPrettnewpt:
+ case Hexagon::JMPrettnew:
+ Result.setOpcode(Hexagon::V4_SL2_jumpr31_tnew);
+ break; // none SUBInst if ( jumpr:nt r31
+ case Hexagon::L2_loadrb_io:
+ Result.setOpcode(Hexagon::V4_SL2_loadrb_io);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1,2,3 SUBInst $Rd = memb($Rs + #$u3_0)
+ case Hexagon::L2_loadrd_io:
+ Result.setOpcode(Hexagon::V4_SL2_loadrd_sp);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 2);
+ break; // 1,3 SUBInst $Rdd = memd(r29 + #$u5_3)
+ case Hexagon::L2_loadrh_io:
+ Result.setOpcode(Hexagon::V4_SL2_loadrh_io);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1,2,3 SUBInst $Rd = memh($Rs + #$u3_1)
+ case Hexagon::L2_loadrub_io:
+ Result.setOpcode(Hexagon::V4_SL1_loadrub_io);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1,2,3 SUBInst $Rd = memub($Rs + #$u4_0)
+ case Hexagon::L2_loadruh_io:
+ Result.setOpcode(Hexagon::V4_SL2_loadruh_io);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1,2,3 SUBInst $Rd = memuh($Rs + #$u3_1)
+ case Hexagon::L2_loadri_io:
+ if (Inst.getOperand(1).getReg() == Hexagon::R29) {
+ Result.setOpcode(Hexagon::V4_SL2_loadri_sp);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 2);
+ break; // 2 1,3 SUBInst $Rd = memw(r29 + #$u5_2)
+ } else {
+ Result.setOpcode(Hexagon::V4_SL1_loadri_io);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1,2,3 SUBInst $Rd = memw($Rs + #$u4_2)
+ }
+ case Hexagon::S4_storeirb_io:
+ if (Inst.getOperand(2).getImm() == 0) {
+ Result.setOpcode(Hexagon::V4_SS2_storebi0);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 SUBInst memb($Rs + #$u4_0)=#0
+ } else if (Inst.getOperand(2).getImm() == 1) {
+ Result.setOpcode(Hexagon::V4_SS2_storebi1);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 2 1,2 SUBInst memb($Rs + #$u4_0)=#1
+ }
+ case Hexagon::S2_storerb_io:
+ Result.setOpcode(Hexagon::V4_SS1_storeb_io);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt
+ case Hexagon::S2_storerd_io:
+ Result.setOpcode(Hexagon::V4_SS2_stored_sp);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 2,3 SUBInst memd(r29 + #$s6_3) = $Rtt
+ case Hexagon::S2_storerh_io:
+ Result.setOpcode(Hexagon::V4_SS2_storeh_io);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt
+ case Hexagon::S4_storeiri_io:
+ if (Inst.getOperand(2).getImm() == 0) {
+ Result.setOpcode(Hexagon::V4_SS2_storewi0);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 3 1,2 SUBInst memw($Rs + #$u4_2)=#0
+ } else if (Inst.getOperand(2).getImm() == 1) {
+ Result.setOpcode(Hexagon::V4_SS2_storewi1);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 3 1,2 SUBInst memw($Rs + #$u4_2)=#1
+ } else if (Inst.getOperand(0).getReg() == Hexagon::R29) {
+ Result.setOpcode(Hexagon::V4_SS2_storew_sp);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1 2,3 SUBInst memw(r29 + #$u5_2) = $Rt
+ }
+ case Hexagon::S2_storeri_io:
+ if (Inst.getOperand(0).getReg() == Hexagon::R29) {
+ Result.setOpcode(Hexagon::V4_SS2_storew_sp);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2); // 1,2,3 SUBInst memw(sp + #$u5_2) = $Rt
+ } else {
+ Result.setOpcode(Hexagon::V4_SS1_storew_io);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2); // 1,2,3 SUBInst memw($Rs + #$u4_2) = $Rt
+ }
+ break;
+ case Hexagon::A2_sxtb:
+ Result.setOpcode(Hexagon::V4_SA1_sxtb);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 SUBInst $Rd = sxtb($Rs)
+ case Hexagon::A2_sxth:
+ Result.setOpcode(Hexagon::V4_SA1_sxth);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 SUBInst $Rd = sxth($Rs)
+ case Hexagon::A2_tfr:
+ Result.setOpcode(Hexagon::V4_SA1_tfr);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 SUBInst $Rd = $Rs
+ case Hexagon::C2_cmovenewif:
+ Result.setOpcode(Hexagon::V4_SA1_clrfnew);
+ addOps(Result, Inst, 0);
+ break; // 2 SUBInst if (! $Rd = #0
+ case Hexagon::C2_cmovenewit:
+ Result.setOpcode(Hexagon::V4_SA1_clrtnew);
+ addOps(Result, Inst, 0);
+ break; // 2 SUBInst if ( $Rd = #0
+ case Hexagon::C2_cmoveif:
+ Result.setOpcode(Hexagon::V4_SA1_clrf);
+ addOps(Result, Inst, 0);
+ break; // 2 SUBInst if (!p0) $Rd = #0
+ case Hexagon::C2_cmoveit:
+ Result.setOpcode(Hexagon::V4_SA1_clrt);
+ addOps(Result, Inst, 0);
+ break; // 2 SUBInst if (p0) $Rd = #0
+ case Hexagon::A2_tfrsi:
+ if (Inst.getOperand(1).isImm() && Inst.getOperand(1).getImm() == -1) {
+ Result.setOpcode(Hexagon::V4_SA1_setin1);
+ addOps(Result, Inst, 0);
+ break; // 2 1 SUBInst $Rd = #-1
+ } else {
+ Result.setOpcode(Hexagon::V4_SA1_seti);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 SUBInst $Rd = #$u6
+ }
+ case Hexagon::A2_zxtb:
+ Result.setOpcode(Hexagon::V4_SA1_zxtb);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 $Rd = and($Rs, #255)
+ case Hexagon::A2_zxth:
+ Result.setOpcode(Hexagon::V4_SA1_zxth);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 SUBInst $Rd = zxth($Rs)
+ }
+ return Result;
+static bool isStoreInst(unsigned opCode) {
+ switch (opCode) {
+ case Hexagon::S2_storeri_io:
+ case Hexagon::S2_storerb_io:
+ case Hexagon::S2_storerh_io:
+ case Hexagon::S2_storerd_io:
+ case Hexagon::S4_storeiri_io:
+ case Hexagon::S4_storeirb_io:
+ case Hexagon::S2_allocframe:
+ return true;
+ default:
+ return false;
+ }
+SmallVector<DuplexCandidate, 8>
+HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII,
+ MCInst const &MCB) {
+ assert(isBundle(MCB));
+ SmallVector<DuplexCandidate, 8> duplexToTry;
+ // Use an "order matters" version of isDuplexPair.
+ unsigned numInstrInPacket = MCB.getNumOperands();
+ for (unsigned distance = 1; distance < numInstrInPacket; ++distance) {
+ for (unsigned j = HexagonMCInstrInfo::bundleInstructionsOffset,
+ k = j + distance;
+ (j < numInstrInPacket) && (k < numInstrInPacket); ++j, ++k) {
+ // Check if reversable.
+ bool bisReversable = true;
+ if (isStoreInst(MCB.getOperand(j).getInst()->getOpcode()) &&
+ isStoreInst(MCB.getOperand(k).getInst()->getOpcode())) {
+ DEBUG(dbgs() << "skip out of order write pair: " << k << "," << j
+ << "\n");
+ bisReversable = false;
+ }
+ // Try in order.
+ if (isOrderedDuplexPair(
+ MCII, *MCB.getOperand(k).getInst(),
+ HexagonMCInstrInfo::hasExtenderForIndex(MCB, k - 1),
+ *MCB.getOperand(j).getInst(),
+ HexagonMCInstrInfo::hasExtenderForIndex(MCB, j - 1),
+ bisReversable)) {
+ // Get iClass.
+ unsigned iClass = iClassOfDuplexPair(
+ getDuplexCandidateGroup(*MCB.getOperand(k).getInst()),
+ getDuplexCandidateGroup(*MCB.getOperand(j).getInst()));
+ // Save off pairs for duplex checking.
+ duplexToTry.push_back(DuplexCandidate(j, k, iClass));
+ DEBUG(dbgs() << "adding pair: " << j << "," << k << ":"
+ << MCB.getOperand(j).getInst()->getOpcode() << ","
+ << MCB.getOperand(k).getInst()->getOpcode() << "\n");
+ continue;
+ } else {
+ DEBUG(dbgs() << "skipping pair: " << j << "," << k << ":"
+ << MCB.getOperand(j).getInst()->getOpcode() << ","
+ << MCB.getOperand(k).getInst()->getOpcode() << "\n");
+ }
+ // Try reverse.
+ if (bisReversable) {
+ if (isOrderedDuplexPair(
+ MCII, *MCB.getOperand(j).getInst(),
+ HexagonMCInstrInfo::hasExtenderForIndex(MCB, j - 1),
+ *MCB.getOperand(k).getInst(),
+ HexagonMCInstrInfo::hasExtenderForIndex(MCB, k - 1),
+ bisReversable)) {
+ // Get iClass.
+ unsigned iClass = iClassOfDuplexPair(
+ getDuplexCandidateGroup(*MCB.getOperand(j).getInst()),
+ getDuplexCandidateGroup(*MCB.getOperand(k).getInst()));
+ // Save off pairs for duplex checking.
+ duplexToTry.push_back(DuplexCandidate(k, j, iClass));
+ DEBUG(dbgs() << "adding pair:" << k << "," << j << ":"
+ << MCB.getOperand(j).getInst()->getOpcode() << ","
+ << MCB.getOperand(k).getInst()->getOpcode() << "\n");
+ } else {
+ DEBUG(dbgs() << "skipping pair: " << k << "," << j << ":"
+ << MCB.getOperand(j).getInst()->getOpcode() << ","
+ << MCB.getOperand(k).getInst()->getOpcode() << "\n");
+ }
+ }
+ }
+ }
+ return duplexToTry;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index 93c7a0d..2731278 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -12,12 +12,53 @@
#include "HexagonMCInstrInfo.h"
+#include "Hexagon.h"
#include "HexagonBaseInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
-void HexagonMCInstrInfo::AppendImplicitOperands(MCInst &MCI) {
- MCI.addOperand(MCOperand::createImm(0));
- MCI.addOperand(MCOperand::createInst(nullptr));
+HexagonMCInstrInfo::bundleInstructions(MCInst const &MCI) {
+ assert(isBundle(MCI));
+ return iterator_range<MCInst::const_iterator>(
+ MCI.begin() + bundleInstructionsOffset, MCI.end());
+size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) {
+ if (HexagonMCInstrInfo::isBundle(MCI))
+ return (MCI.size() - bundleInstructionsOffset);
+ else
+ return (1);
+MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass,
+ MCInst const &inst0,
+ MCInst const &inst1) {
+ assert((iClass <= 0xf) && "iClass must have range of 0 to 0xf");
+ MCInst *duplexInst = new (Context) MCInst;
+ duplexInst->setOpcode(Hexagon::DuplexIClass0 + iClass);
+ MCInst *SubInst0 = new (Context) MCInst(deriveSubInst(inst0));
+ MCInst *SubInst1 = new (Context) MCInst(deriveSubInst(inst1));
+ duplexInst->addOperand(MCOperand::createInst(SubInst0));
+ duplexInst->addOperand(MCOperand::createInst(SubInst1));
+ return duplexInst;
+MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB,
+ size_t Index) {
+ assert(Index <= bundleSize(MCB));
+ if (Index == 0)
+ return nullptr;
+ MCInst const *Inst =
+ MCB.getOperand(Index + bundleInstructionsOffset - 1).getInst();
+ if (isImmext(*Inst))
+ return Inst;
+ return nullptr;
@@ -46,6 +87,24 @@ MCInstrDesc const &HexagonMCInstrInfo::getDesc(MCInstrInfo const &MCII,
return (MCII.get(MCI.getOpcode()));
+unsigned short HexagonMCInstrInfo::getExtendableOp(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
+MCOperand const &
+HexagonMCInstrInfo::getExtendableOperand(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ unsigned O = HexagonMCInstrInfo::getExtendableOp(MCII, MCI);
+ MCOperand const &MO = MCI.getOperand(O);
+ assert((HexagonMCInstrInfo::isExtendable(MCII, MCI) ||
+ HexagonMCInstrInfo::isExtended(MCII, MCI)) &&
+ (MO.isImm() || MO.isExpr()));
+ return (MO);
unsigned HexagonMCInstrInfo::getExtentAlignment(MCInstrInfo const &MCII,
MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -58,12 +117,6 @@ unsigned HexagonMCInstrInfo::getExtentBits(MCInstrInfo const &MCII,
return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
-std::bitset<16> HexagonMCInstrInfo::GetImplicitBits(MCInst const &MCI) {
- SanityCheckImplicitOperands(MCI);
- std::bitset<16> Bits(MCI.getOperand(MCI.getNumOperands() - 2).getImm());
- return Bits;
// Return the max value that a constant extendable operand can have
// without being extended.
int HexagonMCInstrInfo::getMaxValue(MCInstrInfo const &MCII,
@@ -99,9 +152,14 @@ char const *HexagonMCInstrInfo::getName(MCInstrInfo const &MCII,
return MCII.getName(MCI.getOpcode());
-// Return the operand that consumes or produces a new value.
-MCOperand const &HexagonMCInstrInfo::getNewValue(MCInstrInfo const &MCII,
+unsigned short HexagonMCInstrInfo::getNewValueOp(MCInstrInfo const &MCII,
MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask);
+MCOperand const &HexagonMCInstrInfo::getNewValueOperand(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
unsigned const O =
(F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask;
@@ -113,6 +171,21 @@ MCOperand const &HexagonMCInstrInfo::getNewValue(MCInstrInfo const &MCII,
return (MCO);
+int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ HexagonII::SubTarget Target = static_cast<HexagonII::SubTarget>(
+ (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask);
+ switch (Target) {
+ default:
+ return Hexagon::ArchV4;
+ case HexagonII::HasV5SubT:
+ return Hexagon::ArchV5;
+ }
// Return the Hexagon ISA class for the insn.
unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII,
MCInst const &MCI) {
@@ -121,6 +194,32 @@ unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII,
return ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
+unsigned HexagonMCInstrInfo::getUnits(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI,
+ MCInst const &MCI) {
+ const InstrItinerary *II = STI.getSchedModel().InstrItineraries;
+ int SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass();
+ return ((II[SchedClass].FirstStage + HexagonStages)->getUnits());
+bool HexagonMCInstrInfo::hasImmExt(MCInst const &MCI) {
+ if (!HexagonMCInstrInfo::isBundle(MCI))
+ return false;
+ for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCI)) {
+ auto MI = I.getInst();
+ if (isImmext(*MI))
+ return true;
+ }
+ return false;
+bool HexagonMCInstrInfo::hasExtenderForIndex(MCInst const &MCB, size_t Index) {
+ return extenderForIndex(MCB, Index) != nullptr;
// Return whether the instruction is a legal new-value producer.
bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII,
MCInst const &MCI) {
@@ -128,6 +227,18 @@ bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII,
return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask);
+MCInst const &HexagonMCInstrInfo::instruction(MCInst const &MCB, size_t Index) {
+ assert(isBundle(MCB));
+ assert(Index < HEXAGON_PACKET_SIZE);
+ return *MCB.getOperand(bundleInstructionsOffset + Index).getInst();
+bool HexagonMCInstrInfo::isBundle(MCInst const &MCI) {
+ auto Result = Hexagon::BUNDLE == MCI.getOpcode();
+ assert(!Result || (MCI.size() > 0 && MCI.getOperand(0).isImm()));
+ return Result;
// Return whether the insn is an actual insn.
bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) {
return (!HexagonMCInstrInfo::getDesc(MCII, MCI).isPseudo() &&
@@ -135,6 +246,15 @@ bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) {
HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP);
+bool HexagonMCInstrInfo::isDblRegForSubInst(unsigned Reg) {
+ return ((Reg >= Hexagon::D0 && Reg <= Hexagon::D3) ||
+ (Reg >= Hexagon::D8 && Reg <= Hexagon::D11));
+bool HexagonMCInstrInfo::isDuplex(MCInstrInfo const &MCII, MCInst const &MCI) {
+ return HexagonII::TypeDUPLEX == HexagonMCInstrInfo::getType(MCII, MCI);
// Return whether the instruction needs to be constant extended.
// 1) Always return true if the instruction has 'isExtended' flag set.
@@ -173,20 +293,44 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII,
return (ImmValue < MinValue || ImmValue > MaxValue);
-// Return true if the instruction may be extended based on the operand value.
bool HexagonMCInstrInfo::isExtendable(MCInstrInfo const &MCII,
MCInst const &MCI) {
uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
-// Return whether the instruction must be always extended.
bool HexagonMCInstrInfo::isExtended(MCInstrInfo const &MCII,
MCInst const &MCI) {
uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+bool HexagonMCInstrInfo::isFloat(MCInstrInfo const &MCII, MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::FPPos) & HexagonII::FPMask);
+bool HexagonMCInstrInfo::isImmext(MCInst const &MCI) {
+ auto Op = MCI.getOpcode();
+ return (Op == Hexagon::A4_ext_b || Op == Hexagon::A4_ext_c ||
+ Op == Hexagon::A4_ext_g || Op == Hexagon::A4_ext);
+bool HexagonMCInstrInfo::isInnerLoop(MCInst const &MCI) {
+ assert(isBundle(MCI));
+ int64_t Flags = MCI.getOperand(0).getImm();
+ return (Flags & innerLoopMask) != 0;
+bool HexagonMCInstrInfo::isIntReg(unsigned Reg) {
+ return (Reg >= Hexagon::R0 && Reg <= Hexagon::R31);
+bool HexagonMCInstrInfo::isIntRegForSubInst(unsigned Reg) {
+ return ((Reg >= Hexagon::R0 && Reg <= Hexagon::R7) ||
+ (Reg >= Hexagon::R16 && Reg <= Hexagon::R23));
// Return whether the insn is a new-value consumer.
bool HexagonMCInstrInfo::isNewValue(MCInstrInfo const &MCII,
MCInst const &MCI) {
@@ -203,46 +347,103 @@ bool HexagonMCInstrInfo::isOperandExtended(MCInstrInfo const &MCII,
-bool HexagonMCInstrInfo::isPacketBegin(MCInst const &MCI) {
- std::bitset<16> Bits(GetImplicitBits(MCI));
- return Bits.test(packetBeginIndex);
+bool HexagonMCInstrInfo::isOuterLoop(MCInst const &MCI) {
+ assert(isBundle(MCI));
+ int64_t Flags = MCI.getOperand(0).getImm();
+ return (Flags & outerLoopMask) != 0;
-bool HexagonMCInstrInfo::isPacketEnd(MCInst const &MCI) {
- std::bitset<16> Bits(GetImplicitBits(MCI));
- return Bits.test(packetEndIndex);
+bool HexagonMCInstrInfo::isPredicated(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+bool HexagonMCInstrInfo::isPredicatedTrue(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return (
+ !((F >> HexagonII::PredicatedFalsePos) & HexagonII::PredicatedFalseMask));
+bool HexagonMCInstrInfo::isPredReg(unsigned Reg) {
+ return (Reg >= Hexagon::P0 && Reg <= Hexagon::P3_0);
-// Return whether the insn is a prefix.
bool HexagonMCInstrInfo::isPrefix(MCInstrInfo const &MCII, MCInst const &MCI) {
return (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypePREFIX);
-// Return whether the insn is solo, i.e., cannot be in a packet.
bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask);
-void HexagonMCInstrInfo::resetPacket(MCInst &MCI) {
- setPacketBegin(MCI, false);
- setPacketEnd(MCI, false);
+bool HexagonMCInstrInfo::isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::SoloAXPos) & HexagonII::SoloAXMask);
+bool HexagonMCInstrInfo::isSoloAin1(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::SoloAin1Pos) & HexagonII::SoloAin1Mask);
+void HexagonMCInstrInfo::padEndloop(MCInst &MCB) {
+ MCInst Nop;
+ Nop.setOpcode(Hexagon::A2_nop);
+ assert(isBundle(MCB));
+ while ((HexagonMCInstrInfo::isInnerLoop(MCB) &&
+ (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_INNER_SIZE)) ||
+ ((HexagonMCInstrInfo::isOuterLoop(MCB) &&
+ (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_OUTER_SIZE))))
+ MCB.addOperand(MCOperand::createInst(new MCInst(Nop)));
+bool HexagonMCInstrInfo::prefersSlot3(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ if (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCR)
+ return false;
+ unsigned SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass();
+ switch (SchedClass) {
+ case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123:
+ case Hexagon::Sched::ALU64_tc_2_SLOT23:
+ case Hexagon::Sched::ALU64_tc_3x_SLOT23:
+ case Hexagon::Sched::M_tc_2_SLOT23:
+ case Hexagon::Sched::M_tc_3x_SLOT23:
+ case Hexagon::Sched::S_2op_tc_2_SLOT23:
+ case Hexagon::Sched::S_3op_tc_2_SLOT23:
+ case Hexagon::Sched::S_3op_tc_3x_SLOT23:
+ return true;
+ }
+ return false;
-void HexagonMCInstrInfo::SetImplicitBits(MCInst &MCI, std::bitset<16> Bits) {
- SanityCheckImplicitOperands(MCI);
- MCI.getOperand(MCI.getNumOperands() - 2).setImm(Bits.to_ulong());
+void HexagonMCInstrInfo::replaceDuplex(MCContext &Context, MCInst &MCB,
+ DuplexCandidate Candidate) {
+ assert(Candidate.packetIndexI < MCB.size());
+ assert(Candidate.packetIndexJ < MCB.size());
+ assert(isBundle(MCB));
+ MCInst *Duplex =
+ deriveDuplex(Context, Candidate.iClass,
+ *MCB.getOperand(Candidate.packetIndexJ).getInst(),
+ *MCB.getOperand(Candidate.packetIndexI).getInst());
+ assert(Duplex != nullptr);
+ MCB.getOperand(Candidate.packetIndexI).setInst(Duplex);
+ MCB.erase(MCB.begin() + Candidate.packetIndexJ);
-void HexagonMCInstrInfo::setPacketBegin(MCInst &MCI, bool f) {
- std::bitset<16> Bits(GetImplicitBits(MCI));
- Bits.set(packetBeginIndex, f);
- SetImplicitBits(MCI, Bits);
+void HexagonMCInstrInfo::setInnerLoop(MCInst &MCI) {
+ assert(isBundle(MCI));
+ MCOperand &Operand = MCI.getOperand(0);
+ Operand.setImm(Operand.getImm() | innerLoopMask);
-void HexagonMCInstrInfo::setPacketEnd(MCInst &MCI, bool f) {
- std::bitset<16> Bits(GetImplicitBits(MCI));
- Bits.set(packetEndIndex, f);
- SetImplicitBits(MCI, Bits);
+void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) {
+ assert(isBundle(MCI));
+ MCOperand &Operand = MCI.getOperand(0);
+ Operand.setImm(Operand.getImm() | outerLoopMask);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
index 082c80d..09f305f 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -1,4 +1,4 @@
-//===- HexagonMCInstrInfo.cpp - Hexagon sub-class of MCInst ---------------===//
+//===- HexagonMCInstrInfo.cpp - Utility functions on Hexagon MCInsts ------===//
// The LLVM Compiler Infrastructure
@@ -15,20 +15,47 @@
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include <bitset>
namespace llvm {
+class MCContext;
class MCInstrDesc;
class MCInstrInfo;
class MCInst;
class MCOperand;
+class MCSubtargetInfo;
namespace HexagonII {
enum class MemAccessSize;
+class DuplexCandidate {
+ unsigned packetIndexI, packetIndexJ, iClass;
+ DuplexCandidate(unsigned i, unsigned j, unsigned iClass)
+ : packetIndexI(i), packetIndexJ(j), iClass(iClass) {}
namespace HexagonMCInstrInfo {
-void AppendImplicitOperands(MCInst &MCI);
+size_t const innerLoopOffset = 0;
+int64_t const innerLoopMask = 1 << innerLoopOffset;
+size_t const outerLoopOffset = 1;
+int64_t const outerLoopMask = 1 << outerLoopOffset;
+size_t const bundleInstructionsOffset = 1;
+// Returns the number of instructions in the bundle
+size_t bundleSize(MCInst const &MCI);
+// Returns a iterator range of instructions in this bundle
+iterator_range<MCInst::const_iterator> bundleInstructions(MCInst const &MCI);
+// Return the extender for instruction at Index or nullptr if none
+MCInst const *extenderForIndex(MCInst const &MCB, size_t Index);
+// Create a duplex instruction given the two subinsts
+MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0,
+ MCInst const &inst1);
+// Convert this instruction in to a duplex subinst
+MCInst deriveSubInst(MCInst const &Inst);
// Return memory access size
HexagonII::MemAccessSize getAccessSize(MCInstrInfo const &MCII,
@@ -42,14 +69,26 @@ unsigned short getCExtOpNum(MCInstrInfo const &MCII, MCInst const &MCI);
MCInstrDesc const &getDesc(MCInstrInfo const &MCII, MCInst const &MCI);
+// Return which duplex group this instruction belongs to
+unsigned getDuplexCandidateGroup(MCInst const &MI);
+// Return a list of all possible instruction duplex combinations
+SmallVector<DuplexCandidate, 8> getDuplexPossibilties(MCInstrInfo const &MCII,
+ MCInst const &MCB);
+// Return the index of the extendable operand
+unsigned short getExtendableOp(MCInstrInfo const &MCII, MCInst const &MCI);
+// Return a reference to the extendable operand
+MCOperand const &getExtendableOperand(MCInstrInfo const &MCII,
+ MCInst const &MCI);
// Return the implicit alignment of the extendable operand
unsigned getExtentAlignment(MCInstrInfo const &MCII, MCInst const &MCI);
// Return the number of logical bits of the extendable operand
unsigned getExtentBits(MCInstrInfo const &MCII, MCInst const &MCI);
-std::bitset<16> GetImplicitBits(MCInst const &MCI);
// Return the max value that a constant extendable operand can have
// without being extended.
int getMaxValue(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -61,27 +100,77 @@ int getMinValue(MCInstrInfo const &MCII, MCInst const &MCI);
// Return instruction name
char const *getName(MCInstrInfo const &MCII, MCInst const &MCI);
+// Return the operand index for the new value.
+unsigned short getNewValueOp(MCInstrInfo const &MCII, MCInst const &MCI);
// Return the operand that consumes or produces a new value.
-MCOperand const &getNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
+MCOperand const &getNewValueOperand(MCInstrInfo const &MCII, MCInst const &MCI);
+int getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI);
// Return the Hexagon ISA class for the insn.
unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI);
+/// Return the slots used by the insn.
+unsigned getUnits(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst const &MCI);
+// Does the packet have an extender for the instruction at Index
+bool hasExtenderForIndex(MCInst const &MCB, size_t Index);
+bool hasImmExt(MCInst const &MCI);
// Return whether the instruction is a legal new-value producer.
bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
+// Return the instruction at Index
+MCInst const &instruction(MCInst const &MCB, size_t Index);
+// Returns whether this MCInst is a wellformed bundle
+bool isBundle(MCInst const &MCI);
// Return whether the insn is an actual insn.
bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI);
+// Return the duplex iclass given the two duplex classes
+unsigned iClassOfDuplexPair(unsigned Ga, unsigned Gb);
// Return whether the instruction needs to be constant extended.
bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI);
+// Is this double register suitable for use in a duplex subinst
+bool isDblRegForSubInst(unsigned Reg);
+// Is this a duplex instruction
+bool isDuplex(MCInstrInfo const &MCII, MCInst const &MCI);
+// Can these instructions be duplexed
+bool isDuplexPair(MCInst const &MIa, MCInst const &MIb);
+// Can these duplex classes be combine in to a duplex instruction
+bool isDuplexPairMatch(unsigned Ga, unsigned Gb);
// Return true if the insn may be extended based on the operand value.
bool isExtendable(MCInstrInfo const &MCII, MCInst const &MCI);
// Return whether the instruction must be always extended.
bool isExtended(MCInstrInfo const &MCII, MCInst const &MCI);
+/// Return whether it is a floating-point insn.
+bool isFloat(MCInstrInfo const &MCII, MCInst const &MCI);
+// Returns whether this instruction is an immediate extender
+bool isImmext(MCInst const &MCI);
+// Returns whether this bundle is an endloop0
+bool isInnerLoop(MCInst const &MCI);
+// Is this an integer register
+bool isIntReg(unsigned Reg);
+// Is this register suitable for use in a duplex subinst
+bool isIntRegForSubInst(unsigned Reg);
// Return whether the insn is a new-value consumer.
bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -89,9 +178,22 @@ bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
bool isOperandExtended(MCInstrInfo const &MCII, MCInst const &MCI,
unsigned short OperandNum);
-bool isPacketBegin(MCInst const &MCI);
+// Can these two instructions be duplexed
+bool isOrderedDuplexPair(MCInstrInfo const &MCII, MCInst const &MIa,
+ bool ExtendedA, MCInst const &MIb, bool ExtendedB,
+ bool bisReversable);
+// Returns whether this bundle is an endloop1
+bool isOuterLoop(MCInst const &MCI);
+// Return whether this instruction is predicated
+bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI);
+// Return whether the predicate sense is true
+bool isPredicatedTrue(MCInstrInfo const &MCII, MCInst const &MCI);
-bool isPacketEnd(MCInst const &MCI);
+// Is this a predicate register
+bool isPredReg(unsigned Reg);
// Return whether the insn is a prefix.
bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -99,23 +201,31 @@ bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI);
// Return whether the insn is solo, i.e., cannot be in a packet.
bool isSolo(MCInstrInfo const &MCII, MCInst const &MCI);
-static const size_t packetBeginIndex = 0;
-static const size_t packetEndIndex = 1;
+/// Return whether the insn can be packaged only with A and X-type insns.
+bool isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI);
-void resetPacket(MCInst &MCI);
+/// Return whether the insn can be packaged only with an A-type insn in slot #1.
+bool isSoloAin1(MCInstrInfo const &MCII, MCInst const &MCI);
-inline void SanityCheckImplicitOperands(MCInst const &MCI) {
- assert(MCI.getNumOperands() >= 2 && "At least the two implicit operands");
- assert(MCI.getOperand(MCI.getNumOperands() - 1).isInst() &&
- "Implicit bits and flags");
- assert(MCI.getOperand(MCI.getNumOperands() - 2).isImm() && "Parent pointer");
+// Pad the bundle with nops to satisfy endloop requirements
+void padEndloop(MCInst &MCI);
+bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI);
+// Replace the instructions inside MCB, represented by Candidate
+void replaceDuplex(MCContext &Context, MCInst &MCB, DuplexCandidate Candidate);
+// Marks a bundle as endloop0
+void setInnerLoop(MCInst &MCI);
-void SetImplicitBits(MCInst &MCI, std::bitset<16> Bits);
+// Marks a bundle as endloop1
+void setOuterLoop(MCInst &MCI);
-void setPacketBegin(MCInst &MCI, bool Y);
+// Would duplexing this instruction create a requirement to extend
+bool subInstWouldBeExtended(MCInst const &potentialDuplex);
-void setPacketEnd(MCInst &MCI, bool Y);
+// Attempt to find and replace compound pairs
+void tryCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
new file mode 100644
index 0000000..8e70280
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
@@ -0,0 +1,237 @@
+//===----- HexagonMCShuffler.cpp - MC bundle shuffling --------------------===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This implements the shuffling of insns inside a bundle according to the
+// packet formation rules of the Hexagon ISA.
+#define DEBUG_TYPE "hexagon-shuffle"
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+static cl::opt<bool>
+ DisableShuffle("disable-hexagon-shuffle", cl::Hidden, cl::init(false),
+ cl::desc("Disable Hexagon instruction shuffling"));
+void HexagonMCShuffler::init(MCInst &MCB) {
+ if (HexagonMCInstrInfo::isBundle(MCB)) {
+ MCInst const *Extender = nullptr;
+ // Copy the bundle for the shuffling.
+ for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+ assert(!HexagonMCInstrInfo::getDesc(MCII, *I.getInst()).isPseudo());
+ MCInst *MI = const_cast<MCInst *>(I.getInst());
+ if (!HexagonMCInstrInfo::isImmext(*MI)) {
+ append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, *MI),
+ false);
+ Extender = nullptr;
+ } else
+ Extender = MI;
+ }
+ }
+ BundleFlags = MCB.getOperand(0).getImm();
+void HexagonMCShuffler::init(MCInst &MCB, MCInst const *AddMI,
+ bool bInsertAtFront) {
+ if (HexagonMCInstrInfo::isBundle(MCB)) {
+ if (bInsertAtFront && AddMI)
+ append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, *AddMI),
+ false);
+ MCInst const *Extender = nullptr;
+ // Copy the bundle for the shuffling.
+ for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+ assert(!HexagonMCInstrInfo::getDesc(MCII, *I.getInst()).isPseudo());
+ MCInst *MI = const_cast<MCInst *>(I.getInst());
+ if (!HexagonMCInstrInfo::isImmext(*MI)) {
+ append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, *MI),
+ false);
+ Extender = nullptr;
+ } else
+ Extender = MI;
+ }
+ if (!bInsertAtFront && AddMI)
+ append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, *AddMI),
+ false);
+ }
+ BundleFlags = MCB.getOperand(0).getImm();
+void HexagonMCShuffler::copyTo(MCInst &MCB) {
+ MCB.clear();
+ MCB.addOperand(MCOperand::createImm(BundleFlags));
+ // Copy the results into the bundle.
+ for (HexagonShuffler::iterator I = begin(); I != end(); ++I) {
+ MCInst const *MI = I->getDesc();
+ MCInst const *Extender = I->getExtender();
+ if (Extender)
+ MCB.addOperand(MCOperand::createInst(Extender));
+ MCB.addOperand(MCOperand::createInst(MI));
+ }
+bool HexagonMCShuffler::reshuffleTo(MCInst &MCB) {
+ if (shuffle()) {
+ // Copy the results into the bundle.
+ copyTo(MCB);
+ } else
+ DEBUG(MCB.dump());
+ return (!getError());
+bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst &MCB) {
+ HexagonMCShuffler MCS(MCII, STI, MCB);
+ if (DisableShuffle)
+ // Ignore if user chose so.
+ return false;
+ if (!HexagonMCInstrInfo::bundleSize(MCB)) {
+ // There once was a bundle:
+ // BUNDLE %D2<imp-def>, %R4<imp-def>, %R5<imp-def>, %D7<imp-def>, ...
+ // * %D2<def> = IMPLICIT_DEF; flags:
+ // * %D7<def> = IMPLICIT_DEF; flags:
+ // After the IMPLICIT_DEFs were removed by the asm printer, the bundle
+ // became empty.
+ DEBUG(dbgs() << "Skipping empty bundle");
+ return false;
+ } else if (!HexagonMCInstrInfo::isBundle(MCB)) {
+ DEBUG(dbgs() << "Skipping stand-alone insn");
+ return false;
+ }
+ // Reorder the bundle and copy the result.
+ if (!MCS.reshuffleTo(MCB)) {
+ // Unless there is any error, which should not happen at this point.
+ unsigned shuffleError = MCS.getError();
+ switch (shuffleError) {
+ default:
+ llvm_unreachable("unknown error");
+ case HexagonShuffler::SHUFFLE_ERROR_INVALID:
+ llvm_unreachable("invalid packet");
+ case HexagonShuffler::SHUFFLE_ERROR_STORES:
+ llvm_unreachable("too many stores");
+ case HexagonShuffler::SHUFFLE_ERROR_LOADS:
+ llvm_unreachable("too many loads");
+ case HexagonShuffler::SHUFFLE_ERROR_BRANCHES:
+ llvm_unreachable("too many branches");
+ case HexagonShuffler::SHUFFLE_ERROR_NOSLOTS:
+ llvm_unreachable("no suitable slot");
+ case HexagonShuffler::SHUFFLE_ERROR_SLOTS:
+ llvm_unreachable("over-subscribed slots");
+ case HexagonShuffler::SHUFFLE_SUCCESS: // Single instruction case.
+ return true;
+ }
+ }
+ return true;
+llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCContext &Context, MCInst &MCB,
+ SmallVector<DuplexCandidate, 8> possibleDuplexes) {
+ if (DisableShuffle)
+ return HexagonShuffler::SHUFFLE_SUCCESS;
+ if (!HexagonMCInstrInfo::bundleSize(MCB)) {
+ // There once was a bundle:
+ // BUNDLE %D2<imp-def>, %R4<imp-def>, %R5<imp-def>, %D7<imp-def>, ...
+ // * %D2<def> = IMPLICIT_DEF; flags:
+ // * %D7<def> = IMPLICIT_DEF; flags:
+ // After the IMPLICIT_DEFs were removed by the asm printer, the bundle
+ // became empty.
+ DEBUG(dbgs() << "Skipping empty bundle");
+ return HexagonShuffler::SHUFFLE_SUCCESS;
+ } else if (!HexagonMCInstrInfo::isBundle(MCB)) {
+ DEBUG(dbgs() << "Skipping stand-alone insn");
+ return HexagonShuffler::SHUFFLE_SUCCESS;
+ }
+ bool doneShuffling = false;
+ unsigned shuffleError;
+ while (possibleDuplexes.size() > 0 && (!doneShuffling)) {
+ // case of Duplex Found
+ DuplexCandidate duplexToTry = possibleDuplexes.pop_back_val();
+ MCInst Attempt(MCB);
+ HexagonMCInstrInfo::replaceDuplex(Context, Attempt, duplexToTry);
+ HexagonMCShuffler MCS(MCII, STI, Attempt); // copy packet to the shuffler
+ if (MCS.size() == 1) { // case of one duplex
+ // copy the created duplex in the shuffler to the bundle
+ MCS.copyTo(MCB);
+ doneShuffling = true;
+ return HexagonShuffler::SHUFFLE_SUCCESS;
+ }
+ // try shuffle with this duplex
+ doneShuffling = MCS.reshuffleTo(MCB);
+ shuffleError = MCS.getError();
+ if (doneShuffling)
+ break;
+ }
+ if (doneShuffling == false) {
+ HexagonMCShuffler MCS(MCII, STI, MCB);
+ doneShuffling = MCS.reshuffleTo(MCB); // shuffle
+ shuffleError = MCS.getError();
+ }
+ if (!doneShuffling)
+ return shuffleError;
+ return HexagonShuffler::SHUFFLE_SUCCESS;
+bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst &MCB, MCInst const *AddMI, int fixupCount) {
+ if (!HexagonMCInstrInfo::isBundle(MCB) || !AddMI)
+ return false;
+ // if fixups present, make sure we don't insert too many nops that would
+ // later prevent an extender from being inserted.
+ unsigned int bundleSize = HexagonMCInstrInfo::bundleSize(MCB);
+ if (bundleSize >= HEXAGON_PACKET_SIZE)
+ return false;
+ if (fixupCount >= 2) {
+ return false;
+ } else {
+ if (bundleSize == HEXAGON_PACKET_SIZE - 1 && fixupCount)
+ return false;
+ }
+ if (DisableShuffle)
+ return false;
+ HexagonMCShuffler MCS(MCII, STI, MCB, AddMI);
+ if (!MCS.reshuffleTo(MCB)) {
+ unsigned shuffleError = MCS.getError();
+ switch (shuffleError) {
+ default:
+ return false;
+ case HexagonShuffler::SHUFFLE_SUCCESS: // single instruction case
+ return true;
+ }
+ }
+ return true;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
new file mode 100644
index 0000000..a21cce1
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
@@ -0,0 +1,65 @@
+//=-- HexagonMCShuffler.h ---------------------------------------------------=//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This declares the shuffling of insns inside a bundle according to the
+// packet formation rules of the Hexagon ISA.
+#include "MCTargetDesc/HexagonShuffler.h"
+namespace llvm {
+class MCInst;
+// Insn bundle shuffler.
+class HexagonMCShuffler : public HexagonShuffler {
+ bool immext_present;
+ bool duplex_present;
+ HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst &MCB)
+ : HexagonShuffler(MCII, STI) {
+ init(MCB);
+ };
+ HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst &MCB, const MCInst *AddMI,
+ bool bInsertAtFront = false)
+ : HexagonShuffler(MCII, STI) {
+ init(MCB, AddMI, bInsertAtFront);
+ };
+ // Copy reordered bundle to another.
+ void copyTo(MCInst &MCB);
+ // Reorder and copy result to another.
+ bool reshuffleTo(MCInst &MCB);
+ bool immextPresent() const { return immext_present; };
+ bool duplexPresent() const { return duplex_present; };
+ void init(MCInst &MCB);
+ void init(MCInst &MCB, const MCInst *AddMI, bool bInsertAtFront = false);
+// Invocation of the shuffler.
+bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst &);
+bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst &, const MCInst *, int);
+unsigned HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCContext &Context, MCInst &,
+ SmallVector<DuplexCandidate, 8>);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 59395e2..43734ed 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -55,7 +55,7 @@ createHexagonMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
- StringRef TT) {
+ const Triple &TT) {
MCAsmInfo *MAI = new HexagonMCAsmInfo(TT);
// VirtualFP = (R30 + #0).
@@ -112,11 +112,11 @@ extern "C" void LLVMInitializeHexagonTargetMC() {
- // Register the MC Inst Printer
- TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget,
- createHexagonMCInstPrinter);
// Register the asm backend
+ // Register the MC Inst Printer
+ TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget,
+ createHexagonMCInstPrinter);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index de63fd2..81211cc 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -17,6 +17,8 @@
#include <cstdint>
namespace llvm {
+struct InstrItinerary;
+struct InstrStage;
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
@@ -31,6 +33,8 @@ class raw_pwrite_stream;
extern Target TheHexagonTarget;
+extern const InstrStage HexagonStages[];
MCInstrInfo *createHexagonMCInstrInfo();
MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII,
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
new file mode 100644
index 0000000..feaaa4f
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -0,0 +1,385 @@
+//===----- HexagonShuffler.cpp - Instruction bundle shuffling -------------===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This implements the shuffling of insns inside a bundle according to the
+// packet formation rules of the Hexagon ISA.
+#define DEBUG_TYPE "hexagon-shuffle"
+#include <algorithm>
+#include <utility>
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "HexagonShuffler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+// Insn shuffling priority.
+class HexagonBid {
+ // The priority is directly proportional to how restricted the insn is based
+ // on its flexibility to run on the available slots. So, the fewer slots it
+ // may run on, the higher its priority.
+ enum { MAX = 360360 }; // LCD of 1/2, 1/3, 1/4,... 1/15.
+ unsigned Bid;
+ HexagonBid() : Bid(0){};
+ HexagonBid(unsigned B) { Bid = B ? MAX / countPopulation(B) : 0; };
+ // Check if the insn priority is overflowed.
+ bool isSold() const { return (Bid >= MAX); };
+ HexagonBid &operator+=(const HexagonBid &B) {
+ Bid += B.Bid;
+ return *this;
+ };
+// Slot shuffling allocation.
+class HexagonUnitAuction {
+ HexagonBid Scores[HEXAGON_PACKET_SIZE];
+ // Mask indicating which slot is unavailable.
+ unsigned isSold : HEXAGON_PACKET_SIZE;
+ HexagonUnitAuction() : isSold(0){};
+ // Allocate slots.
+ bool bid(unsigned B) {
+ // Exclude already auctioned slots from the bid.
+ unsigned b = B & ~isSold;
+ if (b) {
+ for (unsigned i = 0; i < HEXAGON_PACKET_SIZE; ++i)
+ if (b & (1 << i)) {
+ // Request candidate slots.
+ Scores[i] += HexagonBid(b);
+ isSold |= Scores[i].isSold() << i;
+ }
+ return true;
+ ;
+ } else
+ // Error if the desired slots are already full.
+ return false;
+ };
+unsigned HexagonResource::setWeight(unsigned s) {
+ const unsigned SlotWeight = 8;
+ const unsigned MaskWeight = SlotWeight - 1;
+ bool Key = (1 << s) & getUnits();
+ // Calculate relative weight of the insn for the given slot, weighing it the
+ // heavier the more restrictive the insn is and the lowest the slots that the
+ // insn may be executed in.
+ Weight =
+ (Key << (SlotWeight * s)) * ((MaskWeight - countPopulation(getUnits()))
+ << countTrailingZeros(getUnits()));
+ return (Weight);
+HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI)
+ reset();
+void HexagonShuffler::reset() {
+ Packet.clear();
+ BundleFlags = 0;
+void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender,
+ unsigned S, bool X) {
+ HexagonInstr PI(ID, Extender, S, X);
+ Packet.push_back(PI);
+/// Check that the packet is legal and enforce relative insn order.
+bool HexagonShuffler::check() {
+ // Descriptive slot masks.
+ const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1, slotOne = 0x2,
+ slotThree = 0x8, slotFirstJump = 0x8, slotLastJump = 0x4,
+ slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1;
+ // Highest slots for branches and stores used to keep their original order.
+ unsigned slotJump = slotFirstJump;
+ unsigned slotLoadStore = slotFirstLoadStore;
+ // Number of branches, solo branches, indirect branches.
+ unsigned jumps = 0, jump1 = 0, jumpr = 0;
+ // Number of memory operations, loads, solo loads, stores, solo stores, single
+ // stores.
+ unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0;
+ // Number of duplex insns, solo insns.
+ unsigned duplex = 0, solo = 0;
+ // Number of insns restricting other insns in the packet to A and X types,
+ // which is neither A or X types.
+ unsigned onlyAX = 0, neitherAnorX = 0;
+ // Number of insns restricting other insns in slot #1 to A type.
+ unsigned onlyAin1 = 0;
+ // Number of insns restricting any insn in slot #1, except A2_nop.
+ unsigned onlyNo1 = 0;
+ unsigned xtypeFloat = 0;
+ unsigned pSlot3Cnt = 0;
+ iterator slot3ISJ = end();
+ // Collect information from the insns in the packet.
+ for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
+ MCInst const *ID = ISJ->getDesc();
+ if (HexagonMCInstrInfo::isSolo(MCII, *ID))
+ solo += !ISJ->isSoloException();
+ else if (HexagonMCInstrInfo::isSoloAX(MCII, *ID))
+ onlyAX += !ISJ->isSoloException();
+ else if (HexagonMCInstrInfo::isSoloAin1(MCII, *ID))
+ onlyAin1 += !ISJ->isSoloException();
+ if (HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeALU32 &&
+ HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeXTYPE)
+ ++neitherAnorX;
+ if (HexagonMCInstrInfo::prefersSlot3(MCII, *ID)) {
+ ++pSlot3Cnt;
+ slot3ISJ = ISJ;
+ }
+ switch (HexagonMCInstrInfo::getType(MCII, *ID)) {
+ case HexagonII::TypeXTYPE:
+ if (HexagonMCInstrInfo::isFloat(MCII, *ID))
+ ++xtypeFloat;
+ break;
+ case HexagonII::TypeJR:
+ ++jumpr;
+ // Fall-through.
+ case HexagonII::TypeJ:
+ ++jumps;
+ break;
+ case HexagonII::TypeLD:
+ ++loads;
+ ++memory;
+ if (ISJ->Core.getUnits() == slotSingleLoad)
+ ++load0;
+ if (HexagonMCInstrInfo::getDesc(MCII, *ID).isReturn())
+ ++jumps, ++jump1; // DEALLOC_RETURN is of type LD.
+ break;
+ case HexagonII::TypeST:
+ ++stores;
+ ++memory;
+ if (ISJ->Core.getUnits() == slotSingleStore)
+ ++store0;
+ break;
+ case HexagonII::TypeMEMOP:
+ ++loads;
+ ++stores;
+ ++store1;
+ ++memory;
+ break;
+ case HexagonII::TypeNV:
+ ++memory; // NV insns are memory-like.
+ if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch())
+ ++jumps, ++jump1;
+ break;
+ case HexagonII::TypeCR:
+ // Legacy conditional branch predicated on a register.
+ case HexagonII::TypeSYSTEM:
+ if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayLoad())
+ ++loads;
+ break;
+ }
+ }
+ // Check if the packet is legal.
+ if ((load0 > 1 || store0 > 1) || (duplex > 1 || (duplex && memory)) ||
+ (solo && size() > 1) || (onlyAX && neitherAnorX > 1) ||
+ (onlyAX && xtypeFloat)) {
+ return false;
+ }
+ if (jump1 && jumps > 1) {
+ // Error if single branch with another branch.
+ return false;
+ }
+ // Modify packet accordingly.
+ // TODO: need to reserve slots #0 and #1 for duplex insns.
+ bool bOnlySlot3 = false;
+ for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
+ MCInst const *ID = ISJ->getDesc();
+ if (!ISJ->Core.getUnits()) {
+ // Error if insn may not be executed in any slot.
+ return false;
+ }
+ // Exclude from slot #1 any insn but A2_nop.
+ if (HexagonMCInstrInfo::getDesc(MCII, *ID).getOpcode() != Hexagon::A2_nop)
+ if (onlyNo1)
+ ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne);
+ // Exclude from slot #1 any insn but A-type.
+ if (HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeALU32)
+ if (onlyAin1)
+ ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne);
+ // Branches must keep the original order.
+ if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch() ||
+ HexagonMCInstrInfo::getDesc(MCII, *ID).isCall())
+ if (jumps > 1) {
+ if (jumpr || slotJump < slotLastJump) {
+ // Error if indirect branch with another branch or
+ // no more slots available for branches.
+ return false;
+ }
+ // Pin the branch to the highest slot available to it.
+ ISJ->Core.setUnits(ISJ->Core.getUnits() & slotJump);
+ // Update next highest slot available to branches.
+ slotJump >>= 1;
+ }
+ // A single load must use slot #0.
+ if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayLoad()) {
+ if (loads == 1 && loads == memory)
+ // Pin the load to slot #0.
+ ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleLoad);
+ }
+ // A single store must use slot #0.
+ if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayStore()) {
+ if (!store0) {
+ if (stores == 1)
+ ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleStore);
+ else if (stores > 1) {
+ if (slotLoadStore < slotLastLoadStore) {
+ // Error if no more slots available for stores.
+ return false;
+ }
+ // Pin the store to the highest slot available to it.
+ ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore);
+ // Update the next highest slot available to stores.
+ slotLoadStore >>= 1;
+ }
+ }
+ if (store1 && stores > 1) {
+ // Error if a single store with another store.
+ return false;
+ }
+ }
+ // flag if an instruction can only be executed in slot 3
+ if (ISJ->Core.getUnits() == slotThree)
+ bOnlySlot3 = true;
+ if (!ISJ->Core.getUnits()) {
+ // Error if insn may not be executed in any slot.
+ return false;
+ }
+ }
+ bool validateSlots = true;
+ if (bOnlySlot3 == false && pSlot3Cnt == 1 && slot3ISJ != end()) {
+ // save off slot mask of instruction marked with A_PREFER_SLOT3
+ // and then pin it to slot #3
+ unsigned saveUnits = slot3ISJ->Core.getUnits();
+ slot3ISJ->Core.setUnits(saveUnits & slotThree);
+ HexagonUnitAuction AuctionCore;
+ std::sort(begin(), end(), HexagonInstr::lessCore);
+ // see if things ok with that instruction being pinned to slot #3
+ bool bFail = false;
+ for (iterator I = begin(); I != end() && bFail != true; ++I)
+ if (!>Core.getUnits()))
+ bFail = true;
+ // if yes, great, if not then restore original slot mask
+ if (!bFail)
+ validateSlots = false; // all good, no need to re-do auction
+ else
+ for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
+ MCInst const *ID = ISJ->getDesc();
+ if (HexagonMCInstrInfo::prefersSlot3(MCII, *ID))
+ ISJ->Core.setUnits(saveUnits);
+ }
+ }
+ // Check if any slot, core, is over-subscribed.
+ // Verify the core slot subscriptions.
+ if (validateSlots) {
+ HexagonUnitAuction AuctionCore;
+ std::sort(begin(), end(), HexagonInstr::lessCore);
+ for (iterator I = begin(); I != end(); ++I)
+ if (!>Core.getUnits())) {
+ return false;
+ }
+ }
+ return true;
+bool HexagonShuffler::shuffle() {
+ if (size() > HEXAGON_PACKET_SIZE) {
+ // Ignore a packet with with more than what a packet can hold
+ // or with compound or duplex insns for now.
+ return false;
+ }
+ // Check and prepare packet.
+ if (size() > 1 && check())
+ // Reorder the handles for each slot.
+ for (unsigned nSlot = 0, emptySlots = 0; nSlot < HEXAGON_PACKET_SIZE;
+ ++nSlot) {
+ iterator ISJ, ISK;
+ unsigned slotSkip, slotWeight;
+ // Prioritize the handles considering their restrictions.
+ for (ISJ = ISK = Packet.begin(), slotSkip = slotWeight = 0;
+ ISK != Packet.end(); ++ISK, ++slotSkip)
+ if (slotSkip < nSlot - emptySlots)
+ // Note which handle to begin at.
+ ++ISJ;
+ else
+ // Calculate the weight of the slot.
+ slotWeight += ISK->Core.setWeight(HEXAGON_PACKET_SIZE - nSlot - 1);
+ if (slotWeight)
+ // Sort the packet, favoring source order,
+ // beginning after the previous slot.
+ std::sort(ISJ, Packet.end());
+ else
+ // Skip unused slot.
+ ++emptySlots;
+ }
+ for (iterator ISJ = begin(); ISJ != end(); ++ISJ)
+ DEBUG(dbgs().write_hex(ISJ->Core.getUnits());
+ dbgs() << ':'
+ << HexagonMCInstrInfo::getDesc(MCII, *ISJ->getDesc())
+ .getOpcode();
+ dbgs() << '\n');
+ DEBUG(dbgs() << '\n');
+ return (!getError());
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
new file mode 100644
index 0000000..9218fd3
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -0,0 +1,139 @@
+//===----- HexagonShuffler.h - Instruction bundle shuffling ---------------===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This implements the shuffling of insns inside a bundle according to the
+// packet formation rules of the Hexagon ISA.
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInstrInfo.h"
+using namespace llvm;
+namespace llvm {
+// Insn resources.
+class HexagonResource {
+ // Mask of the slots or units that may execute the insn and
+ // the weight or priority that the insn requires to be assigned a slot.
+ unsigned Slots, Weight;
+ HexagonResource(unsigned s) { setUnits(s); };
+ void setUnits(unsigned s) {
+ Slots = s & ~(-1 << HEXAGON_PACKET_SIZE);
+ setWeight(s);
+ };
+ unsigned setWeight(unsigned s);
+ unsigned getUnits() const { return (Slots); };
+ unsigned getWeight() const { return (Weight); };
+ // Check if the resources are in ascending slot order.
+ static bool lessUnits(const HexagonResource &A, const HexagonResource &B) {
+ return (countPopulation(A.getUnits()) < countPopulation(B.getUnits()));
+ };
+ // Check if the resources are in ascending weight order.
+ static bool lessWeight(const HexagonResource &A, const HexagonResource &B) {
+ return (A.getWeight() < B.getWeight());
+ };
+// Handle to an insn used by the shuffling algorithm.
+class HexagonInstr {
+ friend class HexagonShuffler;
+ MCInst const *ID;
+ MCInst const *Extender;
+ HexagonResource Core;
+ bool SoloException;
+ HexagonInstr(MCInst const *id, MCInst const *Extender, unsigned s,
+ bool x = false)
+ : ID(id), Extender(Extender), Core(s), SoloException(x){};
+ MCInst const *getDesc() const { return (ID); };
+ MCInst const *getExtender() const { return Extender; }
+ unsigned isSoloException() const { return (SoloException); };
+ // Check if the handles are in ascending order for shuffling purposes.
+ bool operator<(const HexagonInstr &B) const {
+ return (HexagonResource::lessWeight(B.Core, Core));
+ };
+ // Check if the handles are in ascending order by core slots.
+ static bool lessCore(const HexagonInstr &A, const HexagonInstr &B) {
+ return (HexagonResource::lessUnits(A.Core, B.Core));
+ };
+// Bundle shuffler.
+class HexagonShuffler {
+ typedef SmallVector<HexagonInstr, HEXAGON_PRESHUFFLE_PACKET_SIZE>
+ HexagonPacket;
+ // Insn handles in a bundle.
+ HexagonPacket Packet;
+ // Shuffling error code.
+ unsigned Error;
+ int64_t BundleFlags;
+ MCInstrInfo const &MCII;
+ MCSubtargetInfo const &STI;
+ typedef HexagonPacket::iterator iterator;
+ enum {
+ SHUFFLE_SUCCESS = 0, ///< Successful operation.
+ SHUFFLE_ERROR_INVALID, ///< Invalid bundle.
+ SHUFFLE_ERROR_STORES, ///< No free slots for store insns.
+ SHUFFLE_ERROR_LOADS, ///< No free slots for load insns.
+ SHUFFLE_ERROR_BRANCHES, ///< No free slots for branch insns.
+ SHUFFLE_ERROR_NOSLOTS, ///< No free slots for other insns.
+ SHUFFLE_ERROR_SLOTS, ///< Over-subscribed slots.
+ SHUFFLE_ERROR_UNKNOWN ///< Unknown error.
+ };
+ explicit HexagonShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI);
+ // Reset to initial state.
+ void reset();
+ // Check if the bundle may be validly shuffled.
+ bool check();
+ // Reorder the insn handles in the bundle.
+ bool shuffle();
+ unsigned size() const { return (Packet.size()); };
+ iterator begin() { return (Packet.begin()); };
+ iterator end() { return (Packet.end()); };
+ // Add insn handle to the bundle .
+ void append(MCInst const *ID, MCInst const *Extender, unsigned S,
+ bool X = false);
+ // Return the error code for the last check or shuffling of the bundle.
+ void setError(unsigned Err) { Error = Err; };
+ unsigned getError() const { return (Error); };
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
index 6c43d978..be6d1a8 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -39,7 +39,7 @@ void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo,
O << Op.getImm();
else {
assert(Op.isExpr() && "unknown pcrel immediate operand");
- O << *Op.getExpr();
+ Op.getExpr()->print(O, &MAI);
@@ -53,7 +53,8 @@ void MSP430InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
O << '#' << Op.getImm();
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
- O << '#' << *Op.getExpr();
+ O << '#';
+ Op.getExpr()->print(O, &MAI);
@@ -75,7 +76,7 @@ void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
O << '&';
if (Disp.isExpr())
- O << *Disp.getExpr();
+ Disp.getExpr()->print(O, &MAI);
else {
assert(Disp.isImm() && "Expected immediate in displacement field");
O << Disp.getImm();
diff --git a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt
index b8f3d02..a305b2d 100644
--- a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = MSP430Desc
parent = MSP430
-required_libraries = MC MSP430AsmPrinter MSP430Info
+required_libraries = MC MSP430AsmPrinter MSP430Info Support
add_to_library_groups = MSP430
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index df1aa1a..c26b308 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -12,12 +12,11 @@
#include "MSP430MCAsmInfo.h"
-#include "llvm/ADT/StringRef.h"
using namespace llvm;
void MSP430MCAsmInfo::anchor() { }
-MSP430MCAsmInfo::MSP430MCAsmInfo(StringRef TT) {
+MSP430MCAsmInfo::MSP430MCAsmInfo(const Triple &TT) {
PointerSize = CalleeSaveStackSlotSize = 2;
CommentString = ";";
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index 2c9532d..ff5b0b6 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -17,12 +17,12 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
- class StringRef;
+ class Triple;
class MSP430MCAsmInfo : public MCAsmInfoELF {
void anchor() override;
- explicit MSP430MCAsmInfo(StringRef TT);
+ explicit MSP430MCAsmInfo(const Triple &TT);
} // namespace llvm
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index a99c9a3..4342c10a 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -75,7 +75,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
O << MO.getImm();
case MachineOperand::MO_MachineBasicBlock:
- O << *MO.getMBB()->getSymbol();
+ MO.getMBB()->getSymbol()->print(O, MAI);
case MachineOperand::MO_GlobalAddress: {
bool isMemOp = Modifier && !strcmp(Modifier, "mem");
@@ -92,7 +92,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
if (Offset)
O << '(' << Offset << '+';
- O << *getSymbol(MO.getGlobal());
+ getSymbol(MO.getGlobal())->print(O, MAI);
if (Offset)
O << ')';
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
index b039778..54154a8 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -96,7 +96,7 @@ MCOperand MSP430MCInstLower::
LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
// FIXME: We would like an efficient form for this, so we don't have to do a
// lot of extra uniquing.
- const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
switch (MO.getTargetFlags()) {
default: llvm_unreachable("Unknown target flag on GV operand");
@@ -104,8 +104,8 @@ LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
if (!MO.isJTI() && MO.getOffset())
- Expr = MCBinaryExpr::CreateAdd(Expr,
- MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Expr = MCBinaryExpr::createAdd(Expr,
+ MCConstantExpr::create(MO.getOffset(), Ctx),
return MCOperand::createExpr(Expr);
@@ -130,7 +130,7 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOp = MCOperand::createImm(MO.getImm());
case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(
+ MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
MO.getMBB()->getSymbol(), Ctx));
case MachineOperand::MO_GlobalAddress:
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index aade12b..9c054e5 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -43,7 +43,7 @@ class MCInstrInfo;
namespace {
class MipsAssemblerOptions {
- MipsAssemblerOptions(uint64_t Features_) :
+ MipsAssemblerOptions(const FeatureBitset &Features_) :
ATReg(1), Reorder(true), Macro(true), Features(Features_) {}
MipsAssemblerOptions(const MipsAssemblerOptions *Opts) {
@@ -70,8 +70,8 @@ public:
void setMacro() { Macro = true; }
void setNoMacro() { Macro = false; }
- uint64_t getFeatures() const { return Features; }
- void setFeatures(uint64_t Features_) { Features = Features_; }
+ const FeatureBitset &getFeatures() const { return Features; }
+ void setFeatures(const FeatureBitset &Features_) { Features = Features_; }
// Set of features that are either architecture features or referenced
// by them (e.g.: FeatureNaN2008 implied by FeatureMips32r6).
@@ -84,7 +84,7 @@ private:
unsigned ATReg;
bool Reorder;
bool Macro;
- uint64_t Features;
+ FeatureBitset Features;
@@ -247,6 +247,8 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetFpDirective();
bool parseSetPopDirective();
bool parseSetPushDirective();
+ bool parseSetSoftFloatDirective();
+ bool parseSetHardFloatDirective();
bool parseSetAssignment();
@@ -325,23 +327,23 @@ class MipsAsmParser : public MCTargetAsmParser {
- AssemblerOptions.back()->setFeatures(getAvailableFeatures());
+ AssemblerOptions.back()->setFeatures(STI.getFeatureBits());
void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
if (!(STI.getFeatureBits()[Feature])) {
+ AssemblerOptions.back()->setFeatures(STI.getFeatureBits());
- AssemblerOptions.back()->setFeatures(getAvailableFeatures());
void clearFeatureBits(uint64_t Feature, StringRef FeatureString) {
if (STI.getFeatureBits()[Feature]) {
+ AssemblerOptions.back()->setFeatures(STI.getFeatureBits());
- AssemblerOptions.back()->setFeatures(getAvailableFeatures());
@@ -367,11 +369,11 @@ public:
// Remember the initial assembler options. The user can not modify these.
- make_unique<MipsAssemblerOptions>(getAvailableFeatures()));
+ llvm::make_unique<MipsAssemblerOptions>(STI.getFeatureBits()));
// Create an assembler options environment for the user to modify.
- make_unique<MipsAssemblerOptions>(getAvailableFeatures()));
+ llvm::make_unique<MipsAssemblerOptions>(STI.getFeatureBits()));
@@ -1946,10 +1948,10 @@ void MipsAsmParser::expandLoadAddressSym(
unsigned RegNo = DstRegOp.getReg();
const MCSymbolRefExpr *Symbol = cast<MCSymbolRefExpr>(SymOp.getExpr());
const MCSymbolRefExpr *HiExpr =
- MCSymbolRefExpr::Create(Symbol->getSymbol().getName(),
+ MCSymbolRefExpr::create(Symbol->getSymbol().getName(),
MCSymbolRefExpr::VK_Mips_ABS_HI, getContext());
const MCSymbolRefExpr *LoExpr =
- MCSymbolRefExpr::Create(Symbol->getSymbol().getName(),
+ MCSymbolRefExpr::create(Symbol->getSymbol().getName(),
MCSymbolRefExpr::VK_Mips_ABS_LO, getContext());
if (!Is32BitSym) {
// If it's a 64-bit architecture, expand to:
@@ -1960,10 +1962,10 @@ void MipsAsmParser::expandLoadAddressSym(
// dsll d,d,16
// ori d,d,lo16(sym)
const MCSymbolRefExpr *HighestExpr =
- MCSymbolRefExpr::Create(Symbol->getSymbol().getName(),
+ MCSymbolRefExpr::create(Symbol->getSymbol().getName(),
MCSymbolRefExpr::VK_Mips_HIGHEST, getContext());
const MCSymbolRefExpr *HigherExpr =
- MCSymbolRefExpr::Create(Symbol->getSymbol().getName(),
+ MCSymbolRefExpr::create(Symbol->getSymbol().getName(),
MCSymbolRefExpr::VK_Mips_HIGHER, getContext());
@@ -2102,7 +2104,7 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
else {
if (ExprOffset->getKind() == MCExpr::SymbolRef) {
SR = static_cast<const MCSymbolRefExpr *>(ExprOffset);
- const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create(
+ const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::create(
SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_HI,
@@ -2133,7 +2135,7 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
else {
if (ExprOffset->getKind() == MCExpr::SymbolRef) {
- const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::Create(
+ const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::create(
SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_LO,
@@ -2505,7 +2507,7 @@ bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
MCSymbol *Sym = getContext().getOrCreateSymbol("$" + Identifier);
// Otherwise create a symbol reference.
const MCExpr *Res =
- MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
Operands.push_back(MipsOperand::CreateImm(Res, S, E, *this));
return false;
@@ -2565,14 +2567,14 @@ const MCExpr *MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr,
report_fatal_error("unsupported reloc value");
- return MCConstantExpr::Create(Val, getContext());
+ return MCConstantExpr::create(Val, getContext());
if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(Expr)) {
// It's a symbol, create a symbolic expression from the symbol.
StringRef Symbol = MSRE->getSymbol().getName();
MCSymbolRefExpr::VariantKind VK = getVariantKind(RelocStr);
- Res = MCSymbolRefExpr::Create(Symbol, VK, getContext());
+ Res = MCSymbolRefExpr::create(Symbol, VK, getContext());
return Res;
@@ -2581,17 +2583,17 @@ const MCExpr *MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr,
// Try to create target expression.
if (MipsMCExpr::isSupportedBinaryExpr(VK, BE))
- return MipsMCExpr::Create(VK, Expr, getContext());
+ return MipsMCExpr::create(VK, Expr, getContext());
const MCExpr *LExp = evaluateRelocExpr(BE->getLHS(), RelocStr);
const MCExpr *RExp = evaluateRelocExpr(BE->getRHS(), RelocStr);
- Res = MCBinaryExpr::Create(BE->getOpcode(), LExp, RExp, getContext());
+ Res = MCBinaryExpr::create(BE->getOpcode(), LExp, RExp, getContext());
return Res;
if (const MCUnaryExpr *UN = dyn_cast<MCUnaryExpr>(Expr)) {
const MCExpr *UnExp = evaluateRelocExpr(UN->getSubExpr(), RelocStr);
- Res = MCUnaryExpr::Create(UN->getOpcode(), UnExp, getContext());
+ Res = MCUnaryExpr::create(UN->getOpcode(), UnExp, getContext());
return Res;
// Just return the original expression.
@@ -2779,7 +2781,7 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
Parser.Lex(); // Eat the ')' token.
if (!IdVal)
- IdVal = MCConstantExpr::Create(0, getContext());
+ IdVal = MCConstantExpr::create(0, getContext());
// Replace the register operand with the memory operand.
std::unique_ptr<MipsOperand> op(
@@ -2790,10 +2792,10 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
// Add the memory operand.
if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(IdVal)) {
int64_t Imm;
- if (IdVal->EvaluateAsAbsolute(Imm))
- IdVal = MCConstantExpr::Create(Imm, getContext());
+ if (IdVal->evaluateAsAbsolute(Imm))
+ IdVal = MCConstantExpr::create(Imm, getContext());
else if (BE->getLHS()->getKind() != MCExpr::SymbolRef)
- IdVal = MCBinaryExpr::Create(BE->getOpcode(), BE->getRHS(), BE->getLHS(),
+ IdVal = MCBinaryExpr::create(BE->getOpcode(), BE->getRHS(), BE->getLHS(),
@@ -3010,7 +3012,7 @@ MipsAsmParser::parseInvNum(OperandVector &Operands) {
int64_t Val = MCE->getValue();
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
- MCConstantExpr::Create(0 - Val, getContext()), S, E, *this));
+ MCConstantExpr::create(0 - Val, getContext()), S, E, *this));
return MatchOperand_Success;
@@ -3034,7 +3036,7 @@ MipsAsmParser::parseLSAImm(OperandVector &Operands) {
return MatchOperand_ParseFail;
int64_t Val;
- if (!Expr->EvaluateAsAbsolute(Val)) {
+ if (!Expr->evaluateAsAbsolute(Val)) {
Error(S, "expected immediate value");
return MatchOperand_ParseFail;
@@ -3601,7 +3603,9 @@ bool MipsAsmParser::parseSetPopDirective() {
return reportParseError(Loc, ".set pop with no .set push");
- setAvailableFeatures(AssemblerOptions.back()->getFeatures());
+ setAvailableFeatures(
+ ComputeAvailableFeatures(AssemblerOptions.back()->getFeatures()));
+ STI.setFeatureBits(AssemblerOptions.back()->getFeatures());
return false;
@@ -3621,6 +3625,28 @@ bool MipsAsmParser::parseSetPushDirective() {
return false;
+bool MipsAsmParser::parseSetSoftFloatDirective() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return reportParseError("unexpected token, expected end of statement");
+ setFeatureBits(Mips::FeatureSoftFloat, "soft-float");
+ getTargetStreamer().emitDirectiveSetSoftFloat();
+ return false;
+bool MipsAsmParser::parseSetHardFloatDirective() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return reportParseError("unexpected token, expected end of statement");
+ clearFeatureBits(Mips::FeatureSoftFloat, "soft-float");
+ getTargetStreamer().emitDirectiveSetHardFloat();
+ return false;
bool MipsAsmParser::parseSetAssignment() {
StringRef Name;
const MCExpr *Value;
@@ -3649,7 +3675,9 @@ bool MipsAsmParser::parseSetMips0Directive() {
return reportParseError("unexpected token, expected end of statement");
// Reset assembler options to their initial values.
- setAvailableFeatures(AssemblerOptions.front()->getFeatures());
+ setAvailableFeatures(
+ ComputeAvailableFeatures(AssemblerOptions.front()->getFeatures()));
+ STI.setFeatureBits(AssemblerOptions.front()->getFeatures());
@@ -3985,6 +4013,10 @@ bool MipsAsmParser::parseDirectiveSet() {
return parseSetMsaDirective();
} else if (Tok.getString() == "nomsa") {
return parseSetNoMsaDirective();
+ } else if (Tok.getString() == "softfloat") {
+ return parseSetSoftFloatDirective();
+ } else if (Tok.getString() == "hardfloat") {
+ return parseSetHardFloatDirective();
} else {
// It is just an identifier, look for an assignment.
@@ -4286,7 +4318,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
reportParseError("expected number after comma");
return false;
- if (!DummyNumber->EvaluateAsAbsolute(DummyNumberVal)) {
+ if (!DummyNumber->evaluateAsAbsolute(DummyNumberVal)) {
reportParseError("expected an absolute expression after comma");
return false;
@@ -4366,7 +4398,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
return false;
- if (!FrameSize->EvaluateAsAbsolute(FrameSizeVal)) {
+ if (!FrameSize->evaluateAsAbsolute(FrameSizeVal)) {
reportParseError("frame size not an absolute expression");
return false;
@@ -4427,7 +4459,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
return false;
- if (!BitMask->EvaluateAsAbsolute(BitMaskVal)) {
+ if (!BitMask->evaluateAsAbsolute(BitMaskVal)) {
reportParseError("bitmask not an absolute expression");
return false;
@@ -4448,7 +4480,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
return false;
- if (!FrameOffset->EvaluateAsAbsolute(FrameOffsetVal)) {
+ if (!FrameOffset->evaluateAsAbsolute(FrameOffsetVal)) {
reportParseError("frame offset not an absolute expression");
return false;
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index eb97c93..c8629b5 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -47,6 +47,8 @@ public:
bool isGP64() const { return STI.getFeatureBits()[Mips::FeatureGP64Bit]; }
+ bool hasCnMips() const { return STI.getFeatureBits()[Mips::FeatureCnMips]; }
bool hasCOP3() const {
// Only present in MIPS-I and MIPS-II
return !hasMips32() && !hasMips3();
@@ -889,6 +891,16 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
+ if (hasCnMips()) {
+ DEBUG(dbgs() << "Trying CnMips table (32-bit opcodes):\n");
+ Result = decodeInstruction(DecoderTableCnMips32, Instr, Insn,
+ Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+ }
if (isGP64()) {
DEBUG(dbgs() << "Trying Mips64 (GPR64) table (32-bit opcodes):\n");
Result = decodeInstruction(DecoderTableMips6432, Instr, Insn,
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index e80a47b..a5637b1 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -122,7 +122,8 @@ void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
+static void printExpr(const MCExpr *Expr, const MCAsmInfo *MAI,
+ raw_ostream &OS) {
int Offset = 0;
const MCSymbolRefExpr *SRE;
@@ -132,7 +133,7 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
assert(SRE && CE && "Binary expression must be sym+const.");
Offset = CE->getValue();
} else if (const MipsMCExpr *ME = dyn_cast<MipsMCExpr>(Expr)) {
- ME->print(OS);
+ ME->print(OS, MAI);
} else
SRE = cast<MCSymbolRefExpr>(Expr);
@@ -170,7 +171,7 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
case MCSymbolRefExpr::VK_Mips_PCREL_LO16: OS << "%pcrel_lo("; break;
- OS << SRE->getSymbol();
+ SRE->getSymbol().print(OS, MAI);
if (Offset) {
if (Offset > 0)
@@ -199,7 +200,7 @@ void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
assert(Op.isExpr() && "unknown operand kind in printOperand");
- printExpr(Op.getExpr(), O);
+ printExpr(Op.getExpr(), &MAI, O);
void MipsInstPrinter::printUnsignedImm(const MCInst *MI, int opNum,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index b1f7c2f..bf8f7d1 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -99,6 +99,10 @@ unsigned MipsABIInfo::GetFramePtr() const {
return ArePtrs64bit() ? Mips::FP_64 : Mips::FP;
+unsigned MipsABIInfo::GetBasePtr() const {
+ return ArePtrs64bit() ? Mips::S7_64 : Mips::S7;
unsigned MipsABIInfo::GetNullPtr() const {
return ArePtrs64bit() ? Mips::ZERO_64 : Mips::ZERO;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
index 9a6ba94..d20dc90 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
@@ -65,6 +65,7 @@ public:
unsigned GetStackPtr() const;
unsigned GetFramePtr() const;
+ unsigned GetBasePtr() const;
unsigned GetNullPtr() const;
unsigned GetPtrAdduOp() const;
unsigned GetPtrAddiuOp() const;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 8d9e3e3..982a7f5 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -12,10 +12,10 @@
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
#include <list>
@@ -46,7 +46,7 @@ struct MipsRelocationEntry {
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override;
- bool needsRelocateWithSymbol(const MCSymbolData &SD,
+ bool needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const override;
virtual void sortRelocs(const MCAssembler &Asm,
std::vector<ELFRelocationEntry> &Relocs) override;
@@ -65,181 +65,134 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
// determine the type of the relocation
- unsigned Type = (unsigned)ELF::R_MIPS_NONE;
unsigned Kind = (unsigned)Fixup.getKind();
switch (Kind) {
- default:
- llvm_unreachable("invalid fixup kind!");
case Mips::fixup_Mips_32:
case FK_Data_4:
- Type = ELF::R_MIPS_32;
- break;
+ return IsPCRel ? ELF::R_MIPS_PC32 : ELF::R_MIPS_32;
case Mips::fixup_Mips_64:
case FK_Data_8:
- Type = ELF::R_MIPS_64;
- break;
+ return ELF::R_MIPS_64;
case FK_GPRel_4:
if (isN64()) {
+ unsigned Type = (unsigned)ELF::R_MIPS_NONE;
Type = setRType((unsigned)ELF::R_MIPS_GPREL32, Type);
Type = setRType2((unsigned)ELF::R_MIPS_64, Type);
Type = setRType3((unsigned)ELF::R_MIPS_NONE, Type);
+ return Type;
- else
- Type = ELF::R_MIPS_GPREL32;
- break;
+ return ELF::R_MIPS_GPREL32;
case Mips::fixup_Mips_GPREL16:
- Type = ELF::R_MIPS_GPREL16;
- break;
+ return ELF::R_MIPS_GPREL16;
case Mips::fixup_Mips_26:
- Type = ELF::R_MIPS_26;
- break;
+ return ELF::R_MIPS_26;
case Mips::fixup_Mips_CALL16:
- Type = ELF::R_MIPS_CALL16;
- break;
+ return ELF::R_MIPS_CALL16;
case Mips::fixup_Mips_GOT_Global:
case Mips::fixup_Mips_GOT_Local:
- Type = ELF::R_MIPS_GOT16;
- break;
+ return ELF::R_MIPS_GOT16;
case Mips::fixup_Mips_HI16:
- Type = ELF::R_MIPS_HI16;
- break;
+ return ELF::R_MIPS_HI16;
case Mips::fixup_Mips_LO16:
- Type = ELF::R_MIPS_LO16;
- break;
+ return ELF::R_MIPS_LO16;
case Mips::fixup_Mips_TLSGD:
- Type = ELF::R_MIPS_TLS_GD;
- break;
+ return ELF::R_MIPS_TLS_GD;
case Mips::fixup_Mips_GOTTPREL:
- break;
case Mips::fixup_Mips_TPREL_HI:
- break;
+ return ELF::R_MIPS_TLS_TPREL_HI16;
case Mips::fixup_Mips_TPREL_LO:
- break;
+ return ELF::R_MIPS_TLS_TPREL_LO16;
case Mips::fixup_Mips_TLSLDM:
- break;
+ return ELF::R_MIPS_TLS_LDM;
case Mips::fixup_Mips_DTPREL_HI:
- break;
case Mips::fixup_Mips_DTPREL_LO:
- break;
case Mips::fixup_Mips_Branch_PCRel:
case Mips::fixup_Mips_PC16:
- Type = ELF::R_MIPS_PC16;
- break;
+ return ELF::R_MIPS_PC16;
case Mips::fixup_Mips_GOT_PAGE:
- break;
+ return ELF::R_MIPS_GOT_PAGE;
case Mips::fixup_Mips_GOT_OFST:
- break;
+ return ELF::R_MIPS_GOT_OFST;
case Mips::fixup_Mips_GOT_DISP:
- break;
- case Mips::fixup_Mips_GPOFF_HI:
+ return ELF::R_MIPS_GOT_DISP;
+ case Mips::fixup_Mips_GPOFF_HI: {
+ unsigned Type = (unsigned)ELF::R_MIPS_NONE;
Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type);
Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type);
Type = setRType3((unsigned)ELF::R_MIPS_HI16, Type);
- break;
- case Mips::fixup_Mips_GPOFF_LO:
+ return Type;
+ }
+ case Mips::fixup_Mips_GPOFF_LO: {
+ unsigned Type = (unsigned)ELF::R_MIPS_NONE;
Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type);
Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type);
Type = setRType3((unsigned)ELF::R_MIPS_LO16, Type);
- break;
+ return Type;
+ }
case Mips::fixup_Mips_HIGHER:
- break;
+ return ELF::R_MIPS_HIGHER;
case Mips::fixup_Mips_HIGHEST:
- break;
case Mips::fixup_Mips_GOT_HI16:
- Type = ELF::R_MIPS_GOT_HI16;
- break;
+ return ELF::R_MIPS_GOT_HI16;
case Mips::fixup_Mips_GOT_LO16:
- Type = ELF::R_MIPS_GOT_LO16;
- break;
+ return ELF::R_MIPS_GOT_LO16;
case Mips::fixup_Mips_CALL_HI16:
- Type = ELF::R_MIPS_CALL_HI16;
- break;
+ return ELF::R_MIPS_CALL_HI16;
case Mips::fixup_Mips_CALL_LO16:
- Type = ELF::R_MIPS_CALL_LO16;
- break;
+ return ELF::R_MIPS_CALL_LO16;
case Mips::fixup_MICROMIPS_26_S1:
- Type = ELF::R_MICROMIPS_26_S1;
- break;
+ return ELF::R_MICROMIPS_26_S1;
case Mips::fixup_MICROMIPS_HI16:
- break;
+ return ELF::R_MICROMIPS_HI16;
case Mips::fixup_MICROMIPS_LO16:
- break;
+ return ELF::R_MICROMIPS_LO16;
case Mips::fixup_MICROMIPS_GOT16:
- break;
+ return ELF::R_MICROMIPS_GOT16;
case Mips::fixup_MICROMIPS_PC7_S1:
- break;
+ return ELF::R_MICROMIPS_PC7_S1;
case Mips::fixup_MICROMIPS_PC10_S1:
- Type = ELF::R_MICROMIPS_PC10_S1;
- break;
+ return ELF::R_MICROMIPS_PC10_S1;
case Mips::fixup_MICROMIPS_PC16_S1:
- Type = ELF::R_MICROMIPS_PC16_S1;
- break;
+ return ELF::R_MICROMIPS_PC16_S1;
case Mips::fixup_MICROMIPS_CALL16:
- break;
case Mips::fixup_MICROMIPS_GOT_DISP:
- break;
case Mips::fixup_MICROMIPS_GOT_PAGE:
- break;
case Mips::fixup_MICROMIPS_GOT_OFST:
- break;
case Mips::fixup_MICROMIPS_TLS_GD:
- break;
case Mips::fixup_MICROMIPS_TLS_LDM:
- break;
case Mips::fixup_MICROMIPS_TLS_DTPREL_HI16:
- break;
case Mips::fixup_MICROMIPS_TLS_DTPREL_LO16:
- break;
case Mips::fixup_MICROMIPS_TLS_TPREL_HI16:
- break;
case Mips::fixup_MICROMIPS_TLS_TPREL_LO16:
- break;
case Mips::fixup_MIPS_PC19_S2:
- Type = ELF::R_MIPS_PC19_S2;
- break;
+ return ELF::R_MIPS_PC19_S2;
case Mips::fixup_MIPS_PC18_S3:
- Type = ELF::R_MIPS_PC18_S3;
- break;
+ return ELF::R_MIPS_PC18_S3;
case Mips::fixup_MIPS_PC21_S2:
- Type = ELF::R_MIPS_PC21_S2;
- break;
+ return ELF::R_MIPS_PC21_S2;
case Mips::fixup_MIPS_PC26_S2:
- Type = ELF::R_MIPS_PC26_S2;
- break;
+ return ELF::R_MIPS_PC26_S2;
case Mips::fixup_MIPS_PCHI16:
- Type = ELF::R_MIPS_PCHI16;
- break;
+ return ELF::R_MIPS_PCHI16;
case Mips::fixup_MIPS_PCLO16:
- Type = ELF::R_MIPS_PCLO16;
- break;
+ return ELF::R_MIPS_PCLO16;
- return Type;
+ llvm_unreachable("invalid fixup kind!");
// Sort entries by SortOffset in descending order.
@@ -271,9 +224,7 @@ static unsigned getMatchingLoType(const MCAssembler &Asm,
if (Type == ELF::R_MIPS16_HI16)
return ELF::R_MIPS16_LO16;
- const MCSymbolData &SD = Asm.getSymbolData(*Reloc.Symbol);
- if (MCELF::GetBinding(SD) != ELF::STB_LOCAL)
+ if (Reloc.Symbol->getBinding() != ELF::STB_LOCAL)
return ELF::R_MIPS_NONE;
if (Type == ELF::R_MIPS_GOT16)
@@ -405,9 +356,8 @@ void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
Relocs[I] = MipsRelocs[I].R;
-MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
- unsigned Type) const {
+bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
+ unsigned Type) const {
// FIXME: This is extremely conservative. This really needs to use a
// whitelist with a clear explanation for why each realocation needs to
// point to the symbol, not to the section.
@@ -434,7 +384,7 @@ MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
return true;
case ELF::R_MIPS_32:
- if (MCELF::getOther(SD) & (ELF::STO_MIPS_MICROMIPS >> 2))
+ if (cast<MCSymbolELF>(Sym).getOther() & ELF::STO_MIPS_MICROMIPS)
return true;
// falltrough
case ELF::R_MIPS_26:
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index d2b5183..b45d9cf 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -9,8 +9,8 @@
#include "MipsELFStreamer.h"
#include "MipsTargetStreamer.h"
-#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Support/ELF.h"
using namespace llvm;
@@ -41,12 +41,10 @@ void MipsELFStreamer::createPendingLabelRelocs() {
// FIXME: Also mark labels when in MIPS16 mode.
if (ELFTargetStreamer->isMicroMipsEnabled()) {
- for (auto Label : Labels) {
- MCSymbolData &Data = getOrCreateSymbolData(Label);
- // The "other" values are stored in the last 6 bits of the second byte.
- // The traditional defines for STO values assume the full byte and thus
- // the shift to pack it.
- MCELF::setOther(Data, ELF::STO_MIPS_MICROMIPS >> 2);
+ for (auto *L : Labels) {
+ auto *Label = cast<MCSymbolELF>(L);
+ getAssembler().registerSymbol(*Label);
+ Label->setOther(ELF::STO_MIPS_MICROMIPS);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index e2bd5a8..4d55458 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -18,8 +18,7 @@ using namespace llvm;
void MipsMCAsmInfo::anchor() { }
-MipsMCAsmInfo::MipsMCAsmInfo(StringRef TT) {
- Triple TheTriple(TT);
+MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) {
if ((TheTriple.getArch() == Triple::mips) ||
(TheTriple.getArch() == Triple::mips64))
IsLittleEndian = false;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
index 59ff1c4..5d23fcb 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
@@ -17,12 +17,12 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
- class StringRef;
+ class Triple;
class MipsMCAsmInfo : public MCAsmInfoELF {
void anchor() override;
- explicit MipsMCAsmInfo(StringRef TT);
+ explicit MipsMCAsmInfo(const Triple &TheTriple);
} // namespace llvm
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index a0d9e15..93925bf 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -467,7 +467,7 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
int64_t Res;
- if (Expr->EvaluateAsAbsolute(Res))
+ if (Expr->evaluateAsAbsolute(Res))
return Res;
MCExpr::ExprKind Kind = Expr->getKind();
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index 74490f3..c85fc48 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -41,7 +41,7 @@ bool MipsMCExpr::isSupportedBinaryExpr(MCSymbolRefExpr::VariantKind VK,
const MipsMCExpr*
-MipsMCExpr::Create(MCSymbolRefExpr::VariantKind VK, const MCExpr *Expr,
+MipsMCExpr::create(MCSymbolRefExpr::VariantKind VK, const MCExpr *Expr,
MCContext &Ctx) {
VariantKind Kind;
switch (VK) {
@@ -64,7 +64,7 @@ MipsMCExpr::Create(MCSymbolRefExpr::VariantKind VK, const MCExpr *Expr,
return new (Ctx) MipsMCExpr(Kind, Expr);
-void MipsMCExpr::PrintImpl(raw_ostream &OS) const {
+void MipsMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
switch (Kind) {
default: llvm_unreachable("Invalid kind!");
case VK_Mips_LO: OS << "%lo"; break;
@@ -74,15 +74,15 @@ void MipsMCExpr::PrintImpl(raw_ostream &OS) const {
OS << '(';
- Expr->print(OS);
+ Expr->print(OS, MAI);
OS << ')';
-MipsMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+MipsMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const {
- return getSubExpr()->EvaluateAsRelocatable(Res, Layout, Fixup);
+ return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup);
void MipsMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
index ee11461..fd2ed17 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
@@ -37,7 +37,7 @@ public:
static bool isSupportedBinaryExpr(MCSymbolRefExpr::VariantKind VK,
const MCBinaryExpr *BE);
- static const MipsMCExpr *Create(MCSymbolRefExpr::VariantKind VK,
+ static const MipsMCExpr *create(MCSymbolRefExpr::VariantKind VK,
const MCExpr *Expr, MCContext &Ctx);
/// getOpcode - Get the kind of this expression.
@@ -46,13 +46,13 @@ public:
/// getSubExpr - Get the child of this expression.
const MCExpr *getSubExpr() const { return Expr; }
- void PrintImpl(raw_ostream &OS) const override;
- bool EvaluateAsRelocatableImpl(MCValue &Res,
+ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+ bool evaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
- MCSection *FindAssociatedSection() const override {
- return getSubExpr()->FindAssociatedSection();
+ MCSection *findAssociatedSection() const override {
+ return getSubExpr()->findAssociatedSection();
// There are no TLS MipsMCExprs at the moment.
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 2e3179a..54d8863 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -75,7 +75,8 @@ static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
-static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
+static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI,
+ const Triple &TT) {
MCAsmInfo *MAI = new MipsMCAsmInfo(TT);
unsigned SP = MRI.getDwarfRegNum(Mips::SP, true);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 8e6f047..a051f4c 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -17,10 +17,9 @@
#include "MipsTargetObjectFile.h"
#include "MipsTargetStreamer.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
@@ -81,6 +80,12 @@ void MipsTargetStreamer::emitDirectiveSetMips64R5() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetMips64R6() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetPop() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetPush() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetSoftFloat() {
+ forbidModuleDirective();
+void MipsTargetStreamer::emitDirectiveSetHardFloat() {
+ forbidModuleDirective();
void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {}
@@ -308,6 +313,16 @@ void MipsTargetAsmStreamer::emitDirectiveSetPush() {
+void MipsTargetAsmStreamer::emitDirectiveSetSoftFloat() {
+ OS << "\t.set\tsoftfloat\n";
+ MipsTargetStreamer::emitDirectiveSetSoftFloat();
+void MipsTargetAsmStreamer::emitDirectiveSetHardFloat() {
+ OS << "\t.set\thardfloat\n";
+ MipsTargetStreamer::emitDirectiveSetHardFloat();
// Print a 32 bit hex number with all numbers.
static void printHex32(unsigned Value, raw_ostream &OS) {
OS << "0x";
@@ -358,7 +373,6 @@ void MipsTargetAsmStreamer::emitDirectiveModuleFP(
MipsABIFlagsSection::FpABIKind Value, bool Is32BitABI) {
MipsTargetStreamer::emitDirectiveModuleFP(Value, Is32BitABI);
- StringRef ModuleValue;
OS << "\t.module\tfp=";
OS << ABIFlagsSection.getFpABIString(Value) << "\n";
@@ -367,7 +381,6 @@ void MipsTargetAsmStreamer::emitDirectiveSetFp(
MipsABIFlagsSection::FpABIKind Value) {
- StringRef ModuleValue;
OS << "\t.set\tfp=";
OS << ABIFlagsSection.getFpABIString(Value) << "\n";
@@ -440,18 +453,16 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S,
-void MipsTargetELFStreamer::emitLabel(MCSymbol *Symbol) {
+void MipsTargetELFStreamer::emitLabel(MCSymbol *S) {
+ auto *Symbol = cast<MCSymbolELF>(S);
if (!isMicroMipsEnabled())
- MCSymbolData &Data = getStreamer().getOrCreateSymbolData(Symbol);
- uint8_t Type = MCELF::GetType(Data);
+ getStreamer().getAssembler().registerSymbol(*Symbol);
+ uint8_t Type = Symbol->getType();
if (Type != ELF::STT_FUNC)
- // The "other" values are stored in the last 6 bits of the second byte
- // The traditional defines for STO values assume the full byte and thus
- // the shift to pack it.
- MCELF::setOther(Data, ELF::STO_MIPS_MICROMIPS >> 2);
+ Symbol->setOther(ELF::STO_MIPS_MICROMIPS);
void MipsTargetELFStreamer::finish() {
@@ -505,23 +516,18 @@ void MipsTargetELFStreamer::finish() {
-void MipsTargetELFStreamer::emitAssignment(MCSymbol *Symbol,
- const MCExpr *Value) {
+void MipsTargetELFStreamer::emitAssignment(MCSymbol *S, const MCExpr *Value) {
+ auto *Symbol = cast<MCSymbolELF>(S);
// If on rhs is micromips symbol then mark Symbol as microMips.
if (Value->getKind() != MCExpr::SymbolRef)
- const MCSymbol &RhsSym =
- static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
- MCSymbolData &Data = getStreamer().getOrCreateSymbolData(&RhsSym);
+ const auto &RhsSym = cast<MCSymbolELF>(
+ static_cast<const MCSymbolRefExpr *>(Value)->getSymbol());
- if (!(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2)))
+ if (!(RhsSym.getOther() & ELF::STO_MIPS_MICROMIPS))
- MCSymbolData &SymbolData = getStreamer().getOrCreateSymbolData(Symbol);
- // The "other" values are stored in the last 6 bits of the second byte.
- // The traditional defines for STO values assume the full byte and thus
- // the shift to pack it.
- MCELF::setOther(SymbolData, ELF::STO_MIPS_MICROMIPS >> 2);
+ Symbol->setOther(ELF::STO_MIPS_MICROMIPS);
MCELFStreamer &MipsTargetELFStreamer::getStreamer() {
@@ -568,7 +574,7 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
const MCSymbolRefExpr *ExprRef =
- MCSymbolRefExpr::Create(Name, MCSymbolRefExpr::VK_None, Context);
+ MCSymbolRefExpr::create(Name, MCSymbolRefExpr::VK_None, Context);
@@ -693,12 +699,12 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
StringRef SymName("_gp_disp");
MCAssembler &MCA = getStreamer().getAssembler();
MCSymbol *GP_Disp = MCA.getContext().getOrCreateSymbol(SymName);
- MCA.getOrCreateSymbolData(*GP_Disp);
+ MCA.registerSymbol(*GP_Disp);
MCInst TmpInst;
- const MCSymbolRefExpr *HiSym = MCSymbolRefExpr::Create(
+ const MCSymbolRefExpr *HiSym = MCSymbolRefExpr::create(
"_gp_disp", MCSymbolRefExpr::VK_Mips_ABS_HI, MCA.getContext());
getStreamer().EmitInstruction(TmpInst, STI);
@@ -708,7 +714,7 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
- const MCSymbolRefExpr *LoSym = MCSymbolRefExpr::Create(
+ const MCSymbolRefExpr *LoSym = MCSymbolRefExpr::create(
"_gp_disp", MCSymbolRefExpr::VK_Mips_ABS_LO, MCA.getContext());
getStreamer().EmitInstruction(TmpInst, STI);
@@ -752,9 +758,9 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
getStreamer().EmitInstruction(Inst, STI);
- const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create(
+ const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::create(
&Sym, MCSymbolRefExpr::VK_Mips_GPOFF_HI, MCA.getContext());
- const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::Create(
+ const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::create(
&Sym, MCSymbolRefExpr::VK_Mips_GPOFF_LO, MCA.getContext());
// lui $gp, %hi(%neg(%gp_rel(funcSym)))
diff --git a/lib/Target/Mips/ b/lib/Target/Mips/
index 272933f..8a27874 100644
--- a/lib/Target/Mips/
+++ b/lib/Target/Mips/
@@ -305,8 +305,9 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
def LONG_BRANCH_DADDiu : PseudoSE<(outs GPR64Opnd:$dst),
(ins GPR64Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>;
-// Cavium Octeon cmMIPS instructions
-let EncodingPredicates = []<Predicate>, // FIXME: The lack of HasStdEnc is probably a bug
+// Cavium Octeon cnMIPS instructions
+let DecoderNamespace = "CnMips",
+ EncodingPredicates = []<Predicate>, // FIXME: The lack of HasStdEnc is probably a bug
AdditionalPredicates = [HasCnMips] in {
class Count1s<string opstr, RegisterOperand RO>:
@@ -353,6 +354,10 @@ class CBranchBitNum<string opstr, DAGOperand opnd, PatFrag cond_op,
let Defs = [AT];
+class MFC2OP<string asmstr, RegisterOperand RO> :
+ InstSE<(outs RO:$rt, uimm16:$imm16), (ins),
+ !strconcat(asmstr, "\t$rt, $imm16"), [], NoItinerary, FrmFR>;
// Unsigned Byte Add
let Pattern = [(set GPR64Opnd:$rd,
(and (add GPR64Opnd:$rs, GPR64Opnd:$rt), 255))] in
@@ -415,6 +420,9 @@ let Defs = [MPL1, MPL2, P0, P1, P2] in
def VMULU : ArithLogicR<"vmulu", GPR64Opnd, 0, II_DMUL>,
ADD_FM<0x1c, 0x0f>;
+// Move between CPU and coprocessor registers
+def DMFC2_OCTEON : MFC2OP<"dmfc2", GPR64Opnd>, MFC2OP_FM<0x12, 1>;
+def DMTC2_OCTEON : MFC2OP<"dmtc2", GPR64Opnd>, MFC2OP_FM<0x12, 5>;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index a3995b8c..f84666b 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -41,7 +41,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -594,11 +594,11 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
case MachineOperand::MO_MachineBasicBlock:
- O << *MO.getMBB()->getSymbol();
+ MO.getMBB()->getSymbol()->print(O, MAI);
case MachineOperand::MO_GlobalAddress:
- O << *getSymbol(MO.getGlobal());
+ getSymbol(MO.getGlobal())->print(O, MAI);
case MachineOperand::MO_BlockAddress: {
@@ -778,7 +778,7 @@ void MipsAsmPrinter::EmitJal(const MCSubtargetInfo &STI, MCSymbol *Symbol) {
MCInst I;
- MCOperand::createExpr(MCSymbolRefExpr::Create(Symbol, OutContext)));
+ MCOperand::createExpr(MCSymbolRefExpr::create(Symbol, OutContext)));
OutStreamer->EmitInstruction(I, STI);
@@ -983,7 +983,8 @@ void MipsAsmPrinter::EmitFPCallStub(
// __call_stub_fp_xxxx:
std::string x = "__call_stub_fp_" + std::string(Symbol);
- MCSymbol *Stub = OutContext.getOrCreateSymbol(StringRef(x));
+ MCSymbolELF *Stub =
+ cast<MCSymbolELF>(OutContext.getOrCreateSymbol(StringRef(x)));
MCSymbol *MType =
OutContext.getOrCreateSymbol("__call_stub_fp_" + Twine(Symbol));
@@ -1028,10 +1029,10 @@ void MipsAsmPrinter::EmitFPCallStub(
MCSymbol *Tmp = OutContext.createTempSymbol();
- const MCSymbolRefExpr *E = MCSymbolRefExpr::Create(Stub, OutContext);
- const MCSymbolRefExpr *T = MCSymbolRefExpr::Create(Tmp, OutContext);
- const MCExpr *T_min_E = MCBinaryExpr::CreateSub(T, E, OutContext);
- OutStreamer->EmitELFSize(Stub, T_min_E);
+ const MCSymbolRefExpr *E = MCSymbolRefExpr::create(Stub, OutContext);
+ const MCSymbolRefExpr *T = MCSymbolRefExpr::create(Tmp, OutContext);
+ const MCExpr *T_min_E = MCBinaryExpr::createSub(T, E, OutContext);
+ OutStreamer->emitELFSize(Stub, T_min_E);
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 4faee10..3d020ab 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -82,6 +82,7 @@ class MipsFastISel final : public FastISel {
LLVMContext *Context;
bool fastLowerCall(CallLoweringInfo &CLI) override;
+ bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
bool TargetSupported;
bool UnsupportedFPMode; // To allow fast-isel to proceed and just not handle
@@ -94,6 +95,7 @@ private:
bool selectLoad(const Instruction *I);
bool selectStore(const Instruction *I);
bool selectBranch(const Instruction *I);
+ bool selectSelect(const Instruction *I);
bool selectCmp(const Instruction *I);
bool selectFPExt(const Instruction *I);
bool selectFPTrunc(const Instruction *I);
@@ -102,6 +104,7 @@ private:
bool selectTrunc(const Instruction *I);
bool selectIntExt(const Instruction *I);
bool selectShift(const Instruction *I);
+ bool selectDivRem(const Instruction *I, unsigned ISDOpcode);
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
@@ -140,6 +143,7 @@ private:
unsigned materializeGV(const GlobalValue *GV, MVT VT);
unsigned materializeInt(const Constant *C, MVT VT);
unsigned materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
+ unsigned materializeExternalCallSym(const char *SynName);
MachineInstrBuilder emitInst(unsigned Opc) {
return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
@@ -156,6 +160,12 @@ private:
unsigned MemReg, int64_t MemOffset) {
return emitInst(Opc, DstReg).addReg(MemReg).addImm(MemOffset);
+ unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
// for some reason, this default is not generated by tablegen
// so we explicitly generate it here.
@@ -359,6 +369,15 @@ unsigned MipsFastISel::materializeGV(const GlobalValue *GV, MVT VT) {
return DestReg;
+unsigned MipsFastISel::materializeExternalCallSym(const char *SymName) {
+ const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+ unsigned DestReg = createResultReg(RC);
+ emitInst(Mips::LW, DestReg)
+ .addReg(MFI->getGlobalBaseReg())
+ .addExternalSymbol(SymName, MipsII::MO_GOT);
+ return DestReg;
// Materialize a constant into a register, and return the register
// number (or zero if we failed to handle it).
unsigned MipsFastISel::fastMaterializeConstant(const Constant *C) {
@@ -463,15 +482,51 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) {
bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) {
- const GlobalValue *GV = dyn_cast<GlobalValue>(V);
- if (GV && isa<Function>(GV) && cast<Function>(GV)->isIntrinsic())
- return false;
- if (!GV)
- return false;
+ const User *U = nullptr;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const auto *I = dyn_cast<Instruction>(V)) {
+ // Check if the value is defined in the same basic block. This information
+ // is crucial to know whether or not folding an operand is valid.
+ if (I->getParent() == FuncInfo.MBB->getBasicBlock()) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
+ } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::BitCast:
+ // Look past bitcasts if its operand is in the same BB.
+ return computeCallAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs if its operand is in the same BB.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return computeCallAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints if its operand is in the same BB.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return computeCallAddress(U->getOperand(0), Addr);
+ break;
+ }
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
return true;
+ // If all else fails, try to materialize the value in a register.
+ if (!Addr.getGlobalValue()) {
+ Addr.setReg(getRegForValue(V));
+ return Addr.getReg() != 0;
+ }
return false;
@@ -893,6 +948,50 @@ bool MipsFastISel::selectFPExt(const Instruction *I) {
return true;
+bool MipsFastISel::selectSelect(const Instruction *I) {
+ assert(isa<SelectInst>(I) && "Expected a select instruction.");
+ if (!isTypeSupported(I->getType(), VT))
+ return false;
+ unsigned CondMovOpc;
+ const TargetRegisterClass *RC;
+ if (VT.isInteger() && !VT.isVector() && VT.getSizeInBits() <= 32) {
+ CondMovOpc = Mips::MOVN_I_I;
+ RC = &Mips::GPR32RegClass;
+ } else if (VT == MVT::f32) {
+ CondMovOpc = Mips::MOVN_I_S;
+ RC = &Mips::FGR32RegClass;
+ } else if (VT == MVT::f64) {
+ CondMovOpc = Mips::MOVN_I_D32;
+ RC = &Mips::AFGR64RegClass;
+ } else
+ return false;
+ const SelectInst *SI = cast<SelectInst>(I);
+ const Value *Cond = SI->getCondition();
+ unsigned Src1Reg = getRegForValue(SI->getTrueValue());
+ unsigned Src2Reg = getRegForValue(SI->getFalseValue());
+ unsigned CondReg = getRegForValue(Cond);
+ if (!Src1Reg || !Src2Reg || !CondReg)
+ return false;
+ unsigned ResultReg = createResultReg(RC);
+ unsigned TempReg = createResultReg(RC);
+ if (!ResultReg || !TempReg)
+ return false;
+ emitInst(TargetOpcode::COPY, TempReg).addReg(Src2Reg);
+ emitInst(CondMovOpc, ResultReg)
+ .addReg(Src1Reg).addReg(CondReg).addReg(TempReg);
+ updateValueMap(I, ResultReg);
+ return true;
// Attempt to fast-select a floating-point truncate instruction.
bool MipsFastISel::selectFPTrunc(const Instruction *I) {
if (UnsupportedFPMode)
@@ -1135,7 +1234,7 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
bool IsTailCall = CLI.IsTailCall;
bool IsVarArg = CLI.IsVarArg;
const Value *Callee = CLI.Callee;
- // const char *SymName = CLI.SymName;
+ const char *SymName = CLI.SymName;
// Allow SelectionDAG isel to handle tail calls.
if (IsTailCall)
@@ -1182,8 +1281,15 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (!processCallArgs(CLI, OutVTs, NumBytes))
return false;
+ if (!Addr.getGlobalValue())
+ return false;
// Issue the call.
- unsigned DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32);
+ unsigned DestAddress;
+ if (SymName)
+ DestAddress = materializeExternalCallSym(SymName);
+ else
+ DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32);
emitInst(TargetOpcode::COPY, Mips::T9).addReg(DestAddress);
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::JALR),
@@ -1203,6 +1309,98 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
return finishCall(CLI, RetVT, NumBytes);
+bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
+ switch (II->getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::bswap: {
+ Type *RetTy = II->getCalledFunction()->getReturnType();
+ if (!isTypeSupported(RetTy, VT))
+ return false;
+ unsigned SrcReg = getRegForValue(II->getOperand(0));
+ if (SrcReg == 0)
+ return false;
+ unsigned DestReg = createResultReg(&Mips::GPR32RegClass);
+ if (DestReg == 0)
+ return false;
+ if (VT == MVT::i16) {
+ if (Subtarget->hasMips32r2()) {
+ emitInst(Mips::WSBH, DestReg).addReg(SrcReg);
+ updateValueMap(II, DestReg);
+ return true;
+ } else {
+ unsigned TempReg[3];
+ for (int i = 0; i < 3; i++) {
+ TempReg[i] = createResultReg(&Mips::GPR32RegClass);
+ if (TempReg[i] == 0)
+ return false;
+ }
+ emitInst(Mips::SLL, TempReg[0]).addReg(SrcReg).addImm(8);
+ emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(8);
+ emitInst(Mips::OR, TempReg[2]).addReg(TempReg[0]).addReg(TempReg[1]);
+ emitInst(Mips::ANDi, DestReg).addReg(TempReg[2]).addImm(0xFFFF);
+ updateValueMap(II, DestReg);
+ return true;
+ }
+ } else if (VT == MVT::i32) {
+ if (Subtarget->hasMips32r2()) {
+ unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ emitInst(Mips::WSBH, TempReg).addReg(SrcReg);
+ emitInst(Mips::ROTR, DestReg).addReg(TempReg).addImm(16);
+ updateValueMap(II, DestReg);
+ return true;
+ } else {
+ unsigned TempReg[8];
+ for (int i = 0; i < 8; i++) {
+ TempReg[i] = createResultReg(&Mips::GPR32RegClass);
+ if (TempReg[i] == 0)
+ return false;
+ }
+ emitInst(Mips::SRL, TempReg[0]).addReg(SrcReg).addImm(8);
+ emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(24);
+ emitInst(Mips::ANDi, TempReg[2]).addReg(TempReg[0]).addImm(0xFF00);
+ emitInst(Mips::OR, TempReg[3]).addReg(TempReg[1]).addReg(TempReg[2]);
+ emitInst(Mips::ANDi, TempReg[4]).addReg(SrcReg).addImm(0xFF00);
+ emitInst(Mips::SLL, TempReg[5]).addReg(TempReg[4]).addImm(8);
+ emitInst(Mips::SLL, TempReg[6]).addReg(SrcReg).addImm(24);
+ emitInst(Mips::OR, TempReg[7]).addReg(TempReg[3]).addReg(TempReg[5]);
+ emitInst(Mips::OR, DestReg).addReg(TempReg[6]).addReg(TempReg[7]);
+ updateValueMap(II, DestReg);
+ return true;
+ }
+ }
+ return false;
+ }
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove: {
+ const auto *MTI = cast<MemTransferInst>(II);
+ // Don't handle volatile.
+ if (MTI->isVolatile())
+ return false;
+ if (!MTI->getLength()->getType()->isIntegerTy(32))
+ return false;
+ const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
+ return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
+ }
+ case Intrinsic::memset: {
+ const MemSetInst *MSI = cast<MemSetInst>(II);
+ // Don't handle volatile.
+ if (MSI->isVolatile())
+ return false;
+ if (!MSI->getLength()->getType()->isIntegerTy(32))
+ return false;
+ return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
+ }
+ }
+ return false;
bool MipsFastISel::selectRet(const Instruction *I) {
const Function &F = *I->getParent()->getParent();
const ReturnInst *Ret = cast<ReturnInst>(I);
@@ -1420,6 +1618,50 @@ unsigned MipsFastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
return Success ? DestReg : 0;
+bool MipsFastISel::selectDivRem(const Instruction *I, unsigned ISDOpcode) {
+ EVT DestEVT = TLI.getValueType(I->getType(), true);
+ if (!DestEVT.isSimple())
+ return false;
+ MVT DestVT = DestEVT.getSimpleVT();
+ if (DestVT != MVT::i32)
+ return false;
+ unsigned DivOpc;
+ switch (ISDOpcode) {
+ default:
+ return false;
+ case ISD::SDIV:
+ case ISD::SREM:
+ DivOpc = Mips::SDIV;
+ break;
+ case ISD::UDIV:
+ case ISD::UREM:
+ DivOpc = Mips::UDIV;
+ break;
+ }
+ unsigned Src0Reg = getRegForValue(I->getOperand(0));
+ unsigned Src1Reg = getRegForValue(I->getOperand(1));
+ if (!Src0Reg || !Src1Reg)
+ return false;
+ emitInst(DivOpc).addReg(Src0Reg).addReg(Src1Reg);
+ emitInst(Mips::TEQ).addReg(Src1Reg).addReg(Mips::ZERO).addImm(7);
+ unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+ if (!ResultReg)
+ return false;
+ unsigned MFOpc = (ISDOpcode == ISD::SREM || ISDOpcode == ISD::UREM)
+ ? Mips::MFHI
+ : Mips::MFLO;
+ emitInst(MFOpc, ResultReg);
+ updateValueMap(I, ResultReg);
+ return true;
bool MipsFastISel::selectShift(const Instruction *I) {
@@ -1505,6 +1747,22 @@ bool MipsFastISel::fastSelectInstruction(const Instruction *I) {
return selectLoad(I);
case Instruction::Store:
return selectStore(I);
+ case Instruction::SDiv:
+ if (!selectBinaryOp(I, ISD::SDIV))
+ return selectDivRem(I, ISD::SDIV);
+ return true;
+ case Instruction::UDiv:
+ if (!selectBinaryOp(I, ISD::UDIV))
+ return selectDivRem(I, ISD::UDIV);
+ return true;
+ case Instruction::SRem:
+ if (!selectBinaryOp(I, ISD::SREM))
+ return selectDivRem(I, ISD::SREM);
+ return true;
+ case Instruction::URem:
+ if (!selectBinaryOp(I, ISD::UREM))
+ return selectDivRem(I, ISD::UREM);
+ return true;
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
@@ -1533,6 +1791,8 @@ bool MipsFastISel::fastSelectInstruction(const Instruction *I) {
case Instruction::ICmp:
case Instruction::FCmp:
return selectCmp(I);
+ case Instruction::Select:
+ return selectSelect(I);
return false;
@@ -1563,6 +1823,33 @@ void MipsFastISel::simplifyAddress(Address &Addr) {
+unsigned MipsFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ // We treat the MUL instruction in a special way because it clobbers
+ // the HI0 & LO0 registers. The TableGen definition of this instruction can
+ // mark these registers only as implicitly defined. As a result, the
+ // register allocator runs out of registers when this instruction is
+ // followed by another instruction that defines the same registers too.
+ // We can fix this by explicitly marking those registers as dead.
+ if (MachineInstOpcode == Mips::MUL) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+ Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(Mips::HI0, RegState::ImplicitDefine | RegState::Dead)
+ .addReg(Mips::LO0, RegState::ImplicitDefine | RegState::Dead);
+ return ResultReg;
+ }
+ return FastISel::fastEmitInst_rr(MachineInstOpcode, RC, Op0, Op0IsKill, Op1,
+ Op1IsKill);
namespace llvm {
FastISel *Mips::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) {
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 826fbaf..a74c8ab 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -90,12 +90,23 @@ const MipsFrameLowering *MipsFrameLowering::create(const MipsSubtarget &ST) {
// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register. This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
+// pointer register. This is true if the function has variable sized allocas,
+// if it needs dynamic stack realignment, if frame pointer elimination is
+// disabled, or if the frame address is taken.
bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
- MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
+ MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() ||
+ TRI->needsStackRealignment(MF);
+bool MipsFrameLowering::hasBP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ return MFI->hasVarSizedObjects() && TRI->needsStackRealignment(MF);
uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const {
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index 0b51830..5eabd58 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -32,6 +32,8 @@ public:
bool hasFP(const MachineFunction &MF) const override;
+ bool hasBP(const MachineFunction &MF) const;
bool isFPCloseToIncomingSP() const override { return false; }
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 6c7f089..67ddcc4 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -3547,7 +3547,8 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
bool MipsTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
+ Type *Ty,
+ unsigned AS) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 6ea14b5..bc9a1ce 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -514,7 +514,8 @@ namespace llvm {
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
diff --git a/lib/Target/Mips/ b/lib/Target/Mips/
index 02ecf32..5f4fcc3 100644
--- a/lib/Target/Mips/
+++ b/lib/Target/Mips/
@@ -226,6 +226,18 @@ class MFC3OP_FM<bits<6> op, bits<5> mfmt>
let Inst{2-0} = sel;
+class MFC2OP_FM<bits<6> op, bits<5> mfmt> : StdArch {
+ bits<5> rt;
+ bits<16> imm16;
+ bits<32> Inst;
+ let Inst{31-26} = op;
+ let Inst{25-21} = mfmt;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
class ADD_FM<bits<6> op, bits<6> funct> : StdArch {
bits<5> rd;
bits<5> rs;
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 9e61180..6b2a44d 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -101,7 +101,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
llvm_unreachable("<unknown operand type>");
- const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, *Ctx);
+ const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Symbol, Kind, *Ctx);
if (!Offset)
return MCOperand::createExpr(MCSym);
@@ -109,8 +109,8 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
// Assume offset is never negative.
assert(Offset > 0);
- const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx);
- const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx);
+ const MCConstantExpr *OffsetExpr = MCConstantExpr::create(Offset, *Ctx);
+ const MCBinaryExpr *Add = MCBinaryExpr::createAdd(MCSym, OffsetExpr, *Ctx);
return MCOperand::createExpr(Add);
@@ -155,11 +155,11 @@ MCOperand MipsMCInstLower::LowerOperand(const MachineOperand &MO,
MCOperand MipsMCInstLower::createSub(MachineBasicBlock *BB1,
MachineBasicBlock *BB2,
MCSymbolRefExpr::VariantKind Kind) const {
- const MCSymbolRefExpr *Sym1 = MCSymbolRefExpr::Create(BB1->getSymbol(), *Ctx);
- const MCSymbolRefExpr *Sym2 = MCSymbolRefExpr::Create(BB2->getSymbol(), *Ctx);
- const MCBinaryExpr *Sub = MCBinaryExpr::CreateSub(Sym1, Sym2, *Ctx);
+ const MCSymbolRefExpr *Sym1 = MCSymbolRefExpr::create(BB1->getSymbol(), *Ctx);
+ const MCSymbolRefExpr *Sym2 = MCSymbolRefExpr::create(BB2->getSymbol(), *Ctx);
+ const MCBinaryExpr *Sub = MCBinaryExpr::createSub(Sym1, Sym2, *Ctx);
- return MCOperand::createExpr(MipsMCExpr::Create(Kind, Sub, *Ctx));
+ return MCOperand::createExpr(MipsMCExpr::create(Kind, Sub, *Ctx));
void MipsMCInstLower::
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index f72fb4d..f6647e6 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/IR/Constants.h"
@@ -178,6 +179,15 @@ getReservedRegs(const MachineFunction &MF) const {
else {
+ // Reserve the base register if we need to both realign the stack and
+ // allocate variable-sized objects at runtime. This should test the
+ // same conditions as MipsFrameLowering::hasBP().
+ if (needsStackRealignment(MF) &&
+ MF.getFrameInfo()->hasVarSizedObjects()) {
+ Reserved.set(Mips::S7);
+ Reserved.set(Mips::S7_64);
+ }
@@ -271,6 +281,67 @@ getFrameRegister(const MachineFunction &MF) const {
return TFI->hasFP(MF) ? (IsN64 ? Mips::FP_64 : Mips::FP) :
(IsN64 ? Mips::SP_64 : Mips::SP);
+bool MipsRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
+ unsigned FP = Subtarget.isGP32bit() ? Mips::FP : Mips::FP_64;
+ unsigned BP = Subtarget.isGP32bit() ? Mips::S7 : Mips::S7_64;
+ // Support dynamic stack realignment only for targets with standard encoding.
+ if (!Subtarget.hasStandardEncoding())
+ return false;
+ // We can't perform dynamic stack realignment if we can't reserve the
+ // frame pointer register.
+ if (!MF.getRegInfo().canReserveReg(FP))
+ return false;
+ // We can realign the stack if we know the maximum call frame size and we
+ // don't have variable sized objects.
+ if (Subtarget.getFrameLowering()->hasReservedCallFrame(MF))
+ return true;
+ // We have to reserve the base pointer register in the presence of variable
+ // sized objects.
+ return MF.getRegInfo().canReserveReg(BP);
+bool MipsRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+ const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool CanRealign = canRealignStack(MF);
+ // Avoid realigning functions that explicitly do not want to be realigned.
+ // Normally, we should report an error when a function should be dynamically
+ // realigned but also has the attribute no-realign-stack. Unfortunately,
+ // with this attribute, MachineFrameInfo clamps each new object's alignment
+ // to that of the stack's alignment as specified by the ABI. As a result,
+ // the information of whether we have objects with larger alignment
+ // requirement than the stack's alignment is already lost at this point.
+ if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
+ return false;
+ const Function *F = MF.getFunction();
+ if (F->hasFnAttribute(Attribute::StackAlignment)) {
+#ifdef DEBUG
+ if (!CanRealign)
+ DEBUG(dbgs() << "It's not possible to realign the stack of the function: "
+ << F->getName() << "\n");
+ return CanRealign;
+ }
+ unsigned StackAlignment = Subtarget.getFrameLowering()->getStackAlignment();
+ if (MFI->getMaxAlignment() > StackAlignment) {
+#ifdef DEBUG
+ if (!CanRealign)
+ DEBUG(dbgs() << "It's not possible to realign the stack of the function: "
+ << F->getName() << "\n");
+ return CanRealign;
+ }
+ return false;
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 76e84bd..ee1f6bc 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -57,6 +57,14 @@ public:
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = nullptr) const;
+ // Stack realignment queries.
+ bool canRealignStack(const MachineFunction &MF) const;
+ bool needsStackRealignment(const MachineFunction &MF) const override;
/// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const override;
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 19efa59..ec7bf31 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -382,6 +382,11 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
unsigned FP = ABI.GetFramePtr();
unsigned ZERO = ABI.GetNullPtr();
unsigned ADDu = ABI.GetPtrAdduOp();
+ unsigned ADDiu = ABI.GetPtrAddiuOp();
+ unsigned AND = ABI.IsN64() ? Mips::AND64 : Mips::AND;
+ const TargetRegisterClass *RC = ABI.ArePtrs64bit() ?
+ &Mips::GPR64RegClass : &Mips::GPR32RegClass;
// First, compute final stack size.
uint64_t StackSize = MFI->getStackSize();
@@ -464,15 +469,12 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
if (MipsFI->callsEhReturn()) {
- const TargetRegisterClass *PtrRC =
- ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
// Insert instructions that spill eh data registers.
for (int I = 0; I < 4; ++I) {
if (!MBB.isLiveIn(ABI.GetEhDataReg(I)))
TII.storeRegToStackSlot(MBB, MBBI, ABI.GetEhDataReg(I), false,
- MipsFI->getEhDataRegFI(I), PtrRC, &RegInfo);
+ MipsFI->getEhDataRegFI(I), RC, &RegInfo);
// Emit .cfi_offset directives for eh data registers.
@@ -497,6 +499,26 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
nullptr, MRI->getDwarfRegNum(FP, true)));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ if (RegInfo.needsStackRealignment(MF)) {
+ // addiu $Reg, $zero, -MaxAlignment
+ // andi $sp, $sp, $Reg
+ unsigned VR = MF.getRegInfo().createVirtualRegister(RC);
+ assert(isInt<16>(MFI->getMaxAlignment()) &&
+ "Function's alignment size requirement is not supported.");
+ int MaxAlign = - (signed) MFI->getMaxAlignment();
+ BuildMI(MBB, MBBI, dl, TII.get(ADDiu), VR).addReg(ZERO) .addImm(MaxAlign);
+ BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR);
+ if (hasBP(MF)) {
+ // move $s7, $sp
+ unsigned BP = STI.isABI_N64() ? Mips::S7_64 : Mips::S7;
+ BuildMI(MBB, MBBI, dl, TII.get(ADDu), BP)
+ .addReg(SP)
+ .addReg(ZERO);
+ }
+ }
@@ -606,10 +628,14 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
MipsABIInfo ABI = STI.getABI();
unsigned FP = ABI.GetFramePtr();
+ unsigned BP = ABI.IsN64() ? Mips::S7_64 : Mips::S7;
// Mark $fp as used if function has dedicated frame pointer.
if (hasFP(MF))
+ // Mark $s7 as used if function has dedicated base pointer.
+ if (hasBP(MF))
+ MRI.setPhysRegUsed(BP);
// Create spill slots for eh data registers if function calls eh_return.
if (MipsFI->callsEhReturn())
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index 8c74a98..132c3a1 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -110,8 +110,11 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
MachineFunction &MF = *MI.getParent()->getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
MipsABIInfo ABI =
static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI();
+ const MipsRegisterInfo *RegInfo =
+ static_cast<const MipsRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
int MinCSFI = 0;
@@ -135,7 +138,14 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI)
FrameReg = ABI.GetStackPtr();
- else
+ else if (RegInfo->needsStackRealignment(MF)) {
+ if (MFI->hasVarSizedObjects() && !MFI->isFixedObjectIndex(FrameIndex))
+ FrameReg = ABI.GetBasePtr();
+ else if (MFI->isFixedObjectIndex(FrameIndex))
+ FrameReg = getFrameRegister(MF);
+ else
+ FrameReg = ABI.GetStackPtr();
+ } else
FrameReg = getFrameRegister(MF);
// Calculate final offset.
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index 22b0c6c..fed0600 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -72,6 +72,8 @@ public:
virtual void emitDirectiveSetNoDsp();
virtual void emitDirectiveSetPop();
virtual void emitDirectiveSetPush();
+ virtual void emitDirectiveSetSoftFloat();
+ virtual void emitDirectiveSetHardFloat();
// PIC support
virtual void emitDirectiveCpLoad(unsigned RegNo);
@@ -188,6 +190,8 @@ public:
void emitDirectiveSetNoDsp() override;
void emitDirectiveSetPop() override;
void emitDirectiveSetPush() override;
+ void emitDirectiveSetSoftFloat() override;
+ void emitDirectiveSetHardFloat() override;
// PIC support
void emitDirectiveCpLoad(unsigned RegNo) override;
diff --git a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
index 3615c14..6a65943 100644
--- a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
+++ b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -23,9 +23,9 @@ extern "C" void LLVMInitializeMipsTargetInfo() {
/*HasJIT=*/true> Y(TheMipselTarget, "mipsel", "Mipsel");
- /*HasJIT=*/false> A(TheMips64Target, "mips64", "Mips64 [experimental]");
+ /*HasJIT=*/true> A(TheMips64Target, "mips64", "Mips64 [experimental]");
- /*HasJIT=*/false> B(TheMips64elTarget,
+ /*HasJIT=*/true> B(TheMips64elTarget,
"mips64el", "Mips64el [experimental]");
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index cdd2f1f..d48a7a9 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -20,7 +20,7 @@ set(NVPTXCodeGen_sources
- NVPTXLowerStructArgs.cpp
+ NVPTXLowerKernelArgs.cpp
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
index ac92df9..4594c22 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
@@ -85,7 +85,7 @@ void NVPTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
O << markup("<imm:") << formatImm(Op.getImm()) << markup(">");
} else {
assert(Op.isExpr() && "Unknown operand kind in printOperand");
- O << *Op.getExpr();
+ Op.getExpr()->print(O, &MAI);
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index b9df3d1..ef36c13 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -25,8 +25,7 @@ static cl::opt<bool> CompileForDebugging("debug-compile",
void NVPTXMCAsmInfo::anchor() {}
-NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) {
- Triple TheTriple(TT);
+NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple) {
if (TheTriple.getArch() == Triple::nvptx64) {
PointerSize = CalleeSaveStackSlotSize = 8;
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
index c324286..b432e06 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -18,12 +18,12 @@
namespace llvm {
class Target;
-class StringRef;
+class Triple;
class NVPTXMCAsmInfo : public MCAsmInfo {
virtual void anchor();
- explicit NVPTXMCAsmInfo(StringRef TT);
+ explicit NVPTXMCAsmInfo(const Triple &TheTriple);
} // namespace llvm
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index 382525d..477b0ba 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -69,7 +69,7 @@ ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping);
MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
-FunctionPass *createNVPTXLowerStructArgsPass();
+FunctionPass *createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM);
bool isImageOrSamplerVal(const Value *, const Module *);
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 3bbea40..298b992 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -266,7 +266,7 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
MCOp = MCOperand::createImm(MO.getImm());
case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(
+ MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
MO.getMBB()->getSymbol(), OutContext));
case MachineOperand::MO_ExternalSymbol:
@@ -283,11 +283,11 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
default: report_fatal_error("Unsupported FP type"); break;
case Type::FloatTyID:
MCOp = MCOperand::createExpr(
- NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext));
+ NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext));
case Type::DoubleTyID:
MCOp = MCOperand::createExpr(
- NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext));
+ NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext));
@@ -334,7 +334,7 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
const MCExpr *Expr;
- Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+ Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
return MCOperand::createExpr(Expr);
@@ -418,9 +418,8 @@ void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
const MachineBasicBlock &MBB) const {
MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
- // TODO: isLoopHeader() should take "const MachineBasicBlock *".
// We insert .pragma "nounroll" only to the loop header.
- if (!LI.isLoopHeader(const_cast<MachineBasicBlock *>(&MBB)))
+ if (!LI.isLoopHeader(&MBB))
return false;
// llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
@@ -468,7 +467,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
printReturnValStr(*MF, O);
- O << *CurrentFnSym;
+ CurrentFnSym->print(O, MAI);
emitFunctionParamList(*MF, O);
@@ -625,7 +624,8 @@ void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
O << ".func ";
printReturnValStr(F, O);
- O << *getSymbol(F) << "\n";
+ getSymbol(F)->print(O, MAI);
+ O << "\n";
emitFunctionParamList(F, O);
O << ";\n";
@@ -1172,7 +1172,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
O << getPTXFundamentalTypeStr(ETy, false);
O << " ";
- O << *getSymbol(GVar);
+ getSymbol(GVar)->print(O, MAI);
// Ptx allows variable initilization only for constant and global state
// spaces.
@@ -1189,11 +1189,9 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
// The frontend adds zero-initializer to variables that don't have an
// initial value, so skip warning for this case.
if (!GVar->getInitializer()->isNullValue()) {
- std::string warnMsg =
- ("initial value of '" + GVar->getName() +
- "' is not allowed in addrspace(" +
- Twine(llvm::utostr_32(PTy->getAddressSpace())) + ")").str();
- report_fatal_error(warnMsg.c_str());
+ report_fatal_error("initial value of '" + GVar->getName() +
+ "' is not allowed in addrspace(" +
+ Twine(PTy->getAddressSpace()) + ")");
@@ -1220,15 +1218,21 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
bufferAggregateConstant(Initializer, &aggBuffer);
if (aggBuffer.numSymbols) {
if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
- O << " .u64 " << *getSymbol(GVar) << "[";
+ O << " .u64 ";
+ getSymbol(GVar)->print(O, MAI);
+ O << "[";
O << ElementSize / 8;
} else {
- O << " .u32 " << *getSymbol(GVar) << "[";
+ O << " .u32 ";
+ getSymbol(GVar)->print(O, MAI);
+ O << "[";
O << ElementSize / 4;
O << "]";
} else {
- O << " .b8 " << *getSymbol(GVar) << "[";
+ O << " .b8 ";
+ getSymbol(GVar)->print(O, MAI);
+ O << "[";
O << ElementSize;
O << "]";
@@ -1236,7 +1240,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
O << "}";
} else {
- O << " .b8 " << *getSymbol(GVar);
+ O << " .b8 ";
+ getSymbol(GVar)->print(O, MAI);
if (ElementSize) {
O << "[";
O << ElementSize;
@@ -1244,7 +1249,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
} else {
- O << " .b8 " << *getSymbol(GVar);
+ O << " .b8 ";
+ getSymbol(GVar)->print(O, MAI);
if (ElementSize) {
O << "[";
O << ElementSize;
@@ -1351,7 +1357,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
O << " .";
O << getPTXFundamentalTypeStr(ETy);
O << " ";
- O << *getSymbol(GVar);
+ getSymbol(GVar)->print(O, MAI);
@@ -1366,9 +1372,11 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
case Type::ArrayTyID:
case Type::VectorTyID:
ElementSize = TD->getTypeStoreSize(ETy);
- O << " .b8 " << *getSymbol(GVar) << "[";
+ O << " .b8 ";
+ getSymbol(GVar)->print(O, MAI);
+ O << "[";
if (ElementSize) {
- O << itostr(ElementSize);
+ O << ElementSize;
O << "]";
@@ -1408,11 +1416,13 @@ static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
int paramIndex, raw_ostream &O) {
- O << *getSymbol(I->getParent()) << "_param_" << paramIndex;
+ getSymbol(I->getParent())->print(O, MAI);
+ O << "_param_" << paramIndex;
void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
- O << *CurrentFnSym << "_param_" << paramIndex;
+ CurrentFnSym->print(O, MAI);
+ O << "_param_" << paramIndex;
void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
@@ -1446,21 +1456,24 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << "\t.param .u64 .ptr .surfref ";
O << "\t.param .surfref ";
- O << *CurrentFnSym << "_param_" << paramIndex;
+ CurrentFnSym->print(O, MAI);
+ O << "_param_" << paramIndex;
else { // Default image is read_only
if (nvptxSubtarget->hasImageHandles())
O << "\t.param .u64 .ptr .texref ";
O << "\t.param .texref ";
- O << *CurrentFnSym << "_param_" << paramIndex;
+ CurrentFnSym->print(O, MAI);
+ O << "_param_" << paramIndex;
} else {
if (nvptxSubtarget->hasImageHandles())
O << "\t.param .u64 .ptr .samplerref ";
O << "\t.param .samplerref ";
- O << *CurrentFnSym << "_param_" << paramIndex;
+ CurrentFnSym->print(O, MAI);
+ O << "_param_" << paramIndex;
@@ -1716,10 +1729,10 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
O << "generic(";
- O << *getSymbol(GVar);
+ getSymbol(GVar)->print(O, MAI);
O << ")";
} else {
- O << *getSymbol(GVar);
+ getSymbol(GVar)->print(O, MAI);
@@ -1733,20 +1746,44 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
O << "generic(";
- O << *getSymbol(GVar);
+ getSymbol(GVar)->print(O, MAI);
O << ")";
} else {
- O << *getSymbol(GVar);
+ getSymbol(GVar)->print(O, MAI);
} else {
- O << *lowerConstant(CPV);
+ lowerConstant(CPV)->print(O, MAI);
llvm_unreachable("Not scalar type found in printScalarConstant()");
+// These utility functions assure we get the right sequence of bytes for a given
+// type even for big-endian machines
+template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) {
+ int64_t vp = (int64_t)val;
+ for (unsigned i = 0; i < sizeof(T); ++i) {
+ p[i] = (unsigned char)vp;
+ vp >>= 8;
+ }
+static void ConvertFloatToBytes(unsigned char *p, float val) {
+ int32_t *vp = (int32_t *)&val;
+ for (unsigned i = 0; i < sizeof(int32_t); ++i) {
+ p[i] = (unsigned char)*vp;
+ *vp >>= 8;
+ }
+static void ConvertDoubleToBytes(unsigned char *p, double val) {
+ int64_t *vp = (int64_t *)&val;
+ for (unsigned i = 0; i < sizeof(int64_t); ++i) {
+ p[i] = (unsigned char)*vp;
+ *vp >>= 8;
+ }
void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
AggBuffer *aggBuffer) {
@@ -1760,30 +1797,30 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
- unsigned char *ptr;
+ unsigned char ptr[8];
switch (CPV->getType()->getTypeID()) {
case Type::IntegerTyID: {
const Type *ETy = CPV->getType();
if (ETy == Type::getInt8Ty(CPV->getContext())) {
unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
- ptr = &c;
+ ConvertIntToBytes<>(ptr, c);
aggBuffer->addBytes(ptr, 1, Bytes);
} else if (ETy == Type::getInt16Ty(CPV->getContext())) {
short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue();
- ptr = (unsigned char *)&int16;
+ ConvertIntToBytes<>(ptr, int16);
aggBuffer->addBytes(ptr, 2, Bytes);
} else if (ETy == Type::getInt32Ty(CPV->getContext())) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
int int32 = (int)(constInt->getZExtValue());
- ptr = (unsigned char *)&int32;
+ ConvertIntToBytes<>(ptr, int32);
aggBuffer->addBytes(ptr, 4, Bytes);
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
ConstantFoldConstantExpression(Cexpr, *TD))) {
int int32 = (int)(constInt->getZExtValue());
- ptr = (unsigned char *)&int32;
+ ConvertIntToBytes<>(ptr, int32);
aggBuffer->addBytes(ptr, 4, Bytes);
@@ -1798,14 +1835,14 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
} else if (ETy == Type::getInt64Ty(CPV->getContext())) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
long long int64 = (long long)(constInt->getZExtValue());
- ptr = (unsigned char *)&int64;
+ ConvertIntToBytes<>(ptr, int64);
aggBuffer->addBytes(ptr, 8, Bytes);
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
ConstantFoldConstantExpression(Cexpr, *TD))) {
long long int64 = (long long)(constInt->getZExtValue());
- ptr = (unsigned char *)&int64;
+ ConvertIntToBytes<>(ptr, int64);
aggBuffer->addBytes(ptr, 8, Bytes);
@@ -1827,11 +1864,11 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
const Type *Ty = CFP->getType();
if (Ty == Type::getFloatTy(CPV->getContext())) {
float float32 = (float) CFP->getValueAPF().convertToFloat();
- ptr = (unsigned char *)&float32;
+ ConvertFloatToBytes(ptr, float32);
aggBuffer->addBytes(ptr, 4, Bytes);
} else if (Ty == Type::getDoubleTy(CPV->getContext())) {
double float64 = CFP->getValueAPF().convertToDouble();
- ptr = (unsigned char *)&float64;
+ ConvertDoubleToBytes(ptr, float64);
aggBuffer->addBytes(ptr, 8, Bytes);
} else {
llvm_unreachable("unsupported fp const type");
@@ -1993,16 +2030,16 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
MCContext &Ctx = OutContext;
if (CV->isNullValue() || isa<UndefValue>(CV))
- return MCConstantExpr::Create(0, Ctx);
+ return MCConstantExpr::create(0, Ctx);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
- return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
+ return MCConstantExpr::create(CI->getZExtValue(), Ctx);
if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
const MCSymbolRefExpr *Expr =
- MCSymbolRefExpr::Create(getSymbol(GV), Ctx);
+ MCSymbolRefExpr::create(getSymbol(GV), Ctx);
if (ProcessingGeneric) {
- return NVPTXGenericMCSymbolRefExpr::Create(Expr, Ctx);
+ return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
} else {
return Expr;
@@ -2059,7 +2096,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
return Base;
int64_t Offset = OffsetAI.getSExtValue();
- return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
+ return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx),
@@ -2102,8 +2139,8 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
// the high bits so we are sure to get a proper truncation if the input is
// a constant expr.
unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
- const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
- return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
+ const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
+ return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
// The MC library also has a right-shift operator, but it isn't consistently
@@ -2113,7 +2150,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
switch (CE->getOpcode()) {
default: llvm_unreachable("Unknown binary operator constant cast expr");
- case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+ case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
@@ -2123,7 +2160,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
switch (Expr.getKind()) {
case MCExpr::Target:
- return cast<MCTargetExpr>(&Expr)->PrintImpl(OS);
+ return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
case MCExpr::Constant:
OS << cast<MCConstantExpr>(Expr).getValue();
@@ -2131,7 +2168,7 @@ void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
case MCExpr::SymbolRef: {
const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
const MCSymbol &Sym = SRE.getSymbol();
- OS << Sym;
+ Sym.print(OS, MAI);
@@ -2256,11 +2293,11 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
case MachineOperand::MO_GlobalAddress:
- O << *getSymbol(MO.getGlobal());
+ getSymbol(MO.getGlobal())->print(O, MAI);
case MachineOperand::MO_MachineBasicBlock:
- O << *MO.getMBB()->getSymbol();
+ MO.getMBB()->getSymbol()->print(O, MAI);
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 301c686..f6f7685 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -165,10 +165,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
O << "generic(";
- O << *Name;
+ Name->print(O, AP.MAI);
O << ")";
} else {
- O << *Name;
+ Name->print(O, AP.MAI);
} else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
const MCExpr *Expr =
diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
index ae63cae..cfff001 100644
--- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -10,34 +10,54 @@
// When a load/store accesses the generic address space, checks whether the
// address is casted from a non-generic address space. If so, remove this
// addrspacecast because accessing non-generic address spaces is typically
-// faster. Besides seeking addrspacecasts, this optimization also traces into
-// the base pointer of a GEP.
+// faster. Besides removing addrspacecasts directly used by loads/stores, this
+// optimization also recursively traces into a GEP's pointer operand and a
+// bitcast's source to find more eliminable addrspacecasts.
// For instance, the code below loads a float from an array allocated in
// addrspace(3).
-// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
-// %1 = gep [10 x float]* %0, i64 0, i64 %i
-// %2 = load float* %1 ; emits ld.f32
+// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
+// %1 = gep [10 x float]* %0, i64 0, i64 %i
+// %2 = bitcast float* %1 to i32*
+// %3 = load i32* %2 ; emits ld.u32
-// First, function hoistAddrSpaceCastFromGEP reorders the addrspacecast
-// and the GEP to expose more optimization opportunities to function
+// First, function hoistAddrSpaceCastFrom reorders the addrspacecast, the GEP,
+// and the bitcast to expose more optimization opportunities to function
// optimizeMemoryInst. The intermediate code looks like:
-// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
-// %1 = addrspacecast float addrspace(3)* %0 to float*
-// %2 = load float* %1 ; still emits ld.f32, but will be optimized shortly
+// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
+// %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)*
+// %2 = addrspacecast i32 addrspace(3)* %1 to i32*
+// %3 = load i32* %2 ; still emits ld.u32, but will be optimized shortly
// Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed
// generic pointers, and folds the load and the addrspacecast into a load from
// the original address space. The final code looks like:
-// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
-// %2 = load float addrspace(3)* %0 ; emits ld.shared.f32
+// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
+// %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)*
+// %3 = load i32 addrspace(3)* %1 ; emits ld.shared.f32
// This pass may remove an addrspacecast in a different BB. Therefore, we
// implement it as a FunctionPass.
+// TODO:
+// The current implementation doesn't handle PHINodes. Eliminating
+// addrspacecasts used by PHINodes is trickier because PHINodes can introduce
+// loops in data flow. For example,
+// %generic.input = addrspacecast float addrspace(3)* %input to float*
+// loop:
+// %y = phi [ %generic.input, %y2 ]
+// %y2 = getelementptr %y, 1
+// %v = load %y2
+// br ..., label %loop, ...
+// Marking %y2 shared depends on marking %y shared, but %y also data-flow
+// depends on %y2. We probably need an iterative fix-point algorithm on handle
+// this case.
#include "NVPTX.h"
@@ -62,17 +82,31 @@ class NVPTXFavorNonGenericAddrSpaces : public FunctionPass {
static char ID;
NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {}
bool runOnFunction(Function &F) override;
/// Optimizes load/store instructions. Idx is the index of the pointer operand
/// (0 for load, and 1 for store). Returns true if it changes anything.
bool optimizeMemoryInstruction(Instruction *I, unsigned Idx);
+ /// Recursively traces into a GEP's pointer operand or a bitcast's source to
+ /// find an eliminable addrspacecast, and hoists that addrspacecast to the
+ /// outermost level. For example, this function transforms
+ /// bitcast(gep(gep(addrspacecast(X))))
+ /// to
+ /// addrspacecast(bitcast(gep(gep(X)))).
+ ///
+ /// This reordering exposes to optimizeMemoryInstruction more
+ /// optimization opportunities on loads and stores.
+ ///
+ /// Returns true if this function succesfully hoists an eliminable
+ /// addrspacecast or V is already such an addrspacecast.
/// Transforms "gep (addrspacecast X), indices" into "addrspacecast (gep X,
- /// indices)". This reordering exposes to optimizeMemoryInstruction more
- /// optimization opportunities on loads and stores. Returns true if it changes
- /// the program.
- bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP);
+ /// indices)".
+ bool hoistAddrSpaceCastFrom(Value *V, int Depth = 0);
+ /// Helper function for GEPs.
+ bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP, int Depth);
+ /// Helper function for bitcasts.
+ bool hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth);
@@ -85,11 +119,12 @@ INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic",
"Remove unnecessary non-generic-to-generic addrspacecasts",
false, false)
-// Decides whether removing Cast is valid and beneficial. Cast can be an
-// instruction or a constant expression.
-static bool IsEliminableAddrSpaceCast(Operator *Cast) {
- // Returns false if not even an addrspacecast.
- if (Cast->getOpcode() != Instruction::AddrSpaceCast)
+// Decides whether V is an addrspacecast and shortcutting V in load/store is
+// valid and beneficial.
+static bool isEliminableAddrSpaceCast(Value *V) {
+ // Returns false if V is not even an addrspacecast.
+ Operator *Cast = dyn_cast<Operator>(V);
+ if (Cast == nullptr || Cast->getOpcode() != Instruction::AddrSpaceCast)
return false;
Value *Src = Cast->getOperand(0);
@@ -108,67 +143,119 @@ static bool IsEliminableAddrSpaceCast(Operator *Cast) {
DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC);
-bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
- GEPOperator *GEP) {
- Operator *Cast = dyn_cast<Operator>(GEP->getPointerOperand());
- if (!Cast)
+bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(GEPOperator *GEP,
+ int Depth) {
+ if (!hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1))
return false;
- if (!IsEliminableAddrSpaceCast(Cast))
- return false;
+ // That hoistAddrSpaceCastFrom succeeds implies GEP's pointer operand is now
+ // an eliminable addrspacecast.
+ assert(isEliminableAddrSpaceCast(GEP->getPointerOperand()));
+ Operator *Cast = cast<Operator>(GEP->getPointerOperand());
SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) {
- // %1 = gep (addrspacecast X), indices
+ // GEP = gep (addrspacecast X), indices
// =>
- // %0 = gep X, indices
- // %1 = addrspacecast %0
- GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(
+ // NewGEP = gep X, indices
+ // NewASC = addrspacecast NewGEP
+ GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
GEP->getSourceElementType(), Cast->getOperand(0), Indices,
- GEP->getName(), GEPI);
- NewGEPI->setIsInBounds(GEP->isInBounds());
- GEP->replaceAllUsesWith(
- new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI));
+ "", GEPI);
+ NewGEP->setIsInBounds(GEP->isInBounds());
+ Value *NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI);
+ NewASC->takeName(GEP);
+ GEP->replaceAllUsesWith(NewASC);
} else {
// GEP is a constant expression.
- Constant *NewGEPCE = ConstantExpr::getGetElementPtr(
+ Constant *NewGEP = ConstantExpr::getGetElementPtr(
GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)),
Indices, GEP->isInBounds());
- ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType()));
+ ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType()));
return true;
-bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI,
- unsigned Idx) {
- // If the pointer operand is a GEP, hoist the addrspacecast if any from the
- // GEP to expose more optimization opportunites.
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(MI->getOperand(Idx))) {
- hoistAddrSpaceCastFromGEP(GEP);
- }
+bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast(
+ BitCastOperator *BC, int Depth) {
+ if (!hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1))
+ return false;
- // load/store (addrspacecast X) => load/store X if shortcutting the
- // addrspacecast is valid and can improve performance.
- //
- // e.g.,
- // %1 = addrspacecast float addrspace(3)* %0 to float*
- // %2 = load float* %1
- // ->
- // %2 = load float addrspace(3)* %0
- //
- // Note: the addrspacecast can also be a constant expression.
- if (Operator *Cast = dyn_cast<Operator>(MI->getOperand(Idx))) {
- if (IsEliminableAddrSpaceCast(Cast)) {
- MI->setOperand(Idx, Cast->getOperand(0));
- return true;
- }
+ // That hoistAddrSpaceCastFrom succeeds implies BC's source operand is now
+ // an eliminable addrspacecast.
+ assert(isEliminableAddrSpaceCast(BC->getOperand(0)));
+ Operator *Cast = cast<Operator>(BC->getOperand(0));
+ // Cast = addrspacecast Src
+ // BC = bitcast Cast
+ // =>
+ // Cast' = bitcast Src
+ // BC' = addrspacecast Cast'
+ Value *Src = Cast->getOperand(0);
+ Type *TypeOfNewCast =
+ PointerType::get(BC->getType()->getPointerElementType(),
+ Src->getType()->getPointerAddressSpace());
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(BC)) {
+ Value *NewCast = new BitCastInst(Src, TypeOfNewCast, "", BCI);
+ Value *NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI);
+ NewBC->takeName(BC);
+ BC->replaceAllUsesWith(NewBC);
+ } else {
+ // BC is a constant expression.
+ Constant *NewCast =
+ ConstantExpr::getBitCast(cast<Constant>(Src), TypeOfNewCast);
+ Constant *NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType());
+ BC->replaceAllUsesWith(NewBC);
+ return true;
+bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V,
+ int Depth) {
+ // Returns true if V is already an eliminable addrspacecast.
+ if (isEliminableAddrSpaceCast(V))
+ return true;
+ // Limit the depth to prevent this recursive function from running too long.
+ const int MaxDepth = 20;
+ if (Depth >= MaxDepth)
+ return false;
+ // If V is a GEP or bitcast, hoist the addrspacecast if any from its pointer
+ // operand. This enables optimizeMemoryInstruction to shortcut addrspacecasts
+ // that are not directly used by the load/store.
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
+ return hoistAddrSpaceCastFromGEP(GEP, Depth);
+ if (BitCastOperator *BC = dyn_cast<BitCastOperator>(V))
+ return hoistAddrSpaceCastFromBitCast(BC, Depth);
return false;
+bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI,
+ unsigned Idx) {
+ if (hoistAddrSpaceCastFrom(MI->getOperand(Idx))) {
+ // load/store (addrspacecast X) => load/store X if shortcutting the
+ // addrspacecast is valid and can improve performance.
+ //
+ // e.g.,
+ // %1 = addrspacecast float addrspace(3)* %0 to float*
+ // %2 = load float* %1
+ // ->
+ // %2 = load float addrspace(3)* %0
+ //
+ // Note: the addrspacecast can also be a constant expression.
+ assert(isEliminableAddrSpaceCast(MI->getOperand(Idx)));
+ Operator *ASC = dyn_cast<Operator>(MI->getOperand(Idx));
+ MI->setOperand(Idx, ASC->getOperand(0));
+ return true;
+ }
+ return false;
bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) {
if (DisableFavorNonGeneric)
return false;
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index fa38a68..232a611 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -613,6 +613,10 @@ SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
Opc =
TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
+ Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
+ : NVPTX::nvvm_ptr_gen_to_param;
+ break;
return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 805847a..b5af72a 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -3725,7 +3725,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
/// (LoopStrengthReduce.cpp) and memory optimization for address mode
/// (CodeGenPrepare.cpp)
bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
+ Type *Ty,
+ unsigned AS) const {
// AddrMode - This represents an addressing mode of:
// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 5142ae3..ed94775 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -456,7 +456,8 @@ public:
/// Used to guide target specific optimizations, like loop strength
/// reduction (LoopStrengthReduce.cpp) and memory optimization for
/// address mode (CodeGenPrepare.cpp)
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned getFunctionAlignment(const Function *F) const;
@@ -497,12 +498,6 @@ public:
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- unsigned getInlineAsmMemConstraint(
- const std::string &ConstraintCode) const override {
- // FIXME: Map different constraints differently.
- return InlineAsm::Constraint_m;
- }
const NVPTXTargetMachine *nvTM;
// PTX always uses 32-bit shift amounts
diff --git a/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
new file mode 100644
index 0000000..24dcb12
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
@@ -0,0 +1,170 @@
+//===-- NVPTXLowerKernelArgs.cpp - Lower kernel arguments -----------------===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// Pointer arguments to kernel functions need to be lowered specially.
+// 1. Copy byval struct args to local memory. This is a preparation for handling
+// cases like
+// kernel void foo(struct A arg, ...)
+// {
+// struct A *p = &arg;
+// ...
+// ... = p->filed1 ... (this is no generic address for .param)
+// p->filed2 = ... (this is no write access to .param)
+// }
+// 2. Convert non-byval pointer arguments of CUDA kernels to pointers in the
+// global address space. This allows later optimizations to emit
+//*/* for accessing these pointer arguments. For
+// example,
+// define void @foo(float* %input) {
+// %v = load float, float* %input, align 4
+// ...
+// }
+// becomes
+// define void @foo(float* %input) {
+// %input2 = addrspacecast float* %input to float addrspace(1)*
+// %input3 = addrspacecast float addrspace(1)* %input2 to float*
+// %v = load float, float* %input3, align 4
+// ...
+// }
+// Later, NVPTXFavorNonGenericAddrSpaces will optimize it to
+// define void @foo(float* %input) {
+// %input2 = addrspacecast float* %input to float addrspace(1)*
+// %v = load float, float addrspace(1)* %input2, align 4
+// ...
+// }
+// TODO: merge this pass with NVPTXFavorNonGenericAddrSpace so that other passes
+// don't cancel the addrspacecast pair this pass emits.
+#include "NVPTX.h"
+#include "NVPTXUtilities.h"
+#include "NVPTXTargetMachine.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+namespace llvm {
+void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
+namespace {
+class NVPTXLowerKernelArgs : public FunctionPass {
+ bool runOnFunction(Function &F) override;
+ // handle byval parameters
+ void handleByValParam(Argument *);
+ // handle non-byval pointer parameters
+ void handlePointerParam(Argument *);
+ static char ID; // Pass identification, replacement for typeid
+ NVPTXLowerKernelArgs(const NVPTXTargetMachine *TM = nullptr)
+ : FunctionPass(ID), TM(TM) {}
+ const char *getPassName() const override {
+ return "Lower pointer arguments of CUDA kernels";
+ }
+ const NVPTXTargetMachine *TM;
+} // namespace
+char NVPTXLowerKernelArgs::ID = 1;
+INITIALIZE_PASS(NVPTXLowerKernelArgs, "nvptx-lower-kernel-args",
+ "Lower kernel arguments (NVPTX)", false, false)
+// =============================================================================
+// If the function had a byval struct ptr arg, say foo(%struct.x *byval %d),
+// then add the following instructions to the first basic block:
+// %temp = alloca %struct.x, align 8
+// %tempd = addrspacecast %struct.x* %d to %struct.x addrspace(101)*
+// %tv = load %struct.x addrspace(101)* %tempd
+// store %struct.x %tv, %struct.x* %temp, align 8
+// The above code allocates some space in the stack and copies the incoming
+// struct from param space to local space.
+// Then replace all occurences of %d by %temp.
+// =============================================================================
+void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) {
+ Function *Func = Arg->getParent();
+ Instruction *FirstInst = &(Func->getEntryBlock().front());
+ PointerType *PType = dyn_cast<PointerType>(Arg->getType());
+ assert(PType && "Expecting pointer type in handleByValParam");
+ Type *StructType = PType->getElementType();
+ AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst);
+ // Set the alignment to alignment of the byval parameter. This is because,
+ // later load/stores assume that alignment, and we are going to replace
+ // the use of the byval parameter with this alloca instruction.
+ AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1));
+ Arg->replaceAllUsesWith(AllocA);
+ Value *ArgInParam = new AddrSpaceCastInst(
+ Arg, PointerType::get(StructType, ADDRESS_SPACE_PARAM), Arg->getName(),
+ FirstInst);
+ LoadInst *LI = new LoadInst(ArgInParam, Arg->getName(), FirstInst);
+ new StoreInst(LI, AllocA, FirstInst);
+void NVPTXLowerKernelArgs::handlePointerParam(Argument *Arg) {
+ assert(!Arg->hasByValAttr() &&
+ "byval params should be handled by handleByValParam");
+ Instruction *FirstInst = Arg->getParent()->getEntryBlock().begin();
+ Instruction *ArgInGlobal = new AddrSpaceCastInst(
+ Arg, PointerType::get(Arg->getType()->getPointerElementType(),
+ Arg->getName(), FirstInst);
+ Value *ArgInGeneric = new AddrSpaceCastInst(ArgInGlobal, Arg->getType(),
+ Arg->getName(), FirstInst);
+ // Replace with ArgInGeneric all uses of Args except ArgInGlobal.
+ Arg->replaceAllUsesWith(ArgInGeneric);
+ ArgInGlobal->setOperand(0, Arg);
+// =============================================================================
+// Main function for this pass.
+// =============================================================================
+bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
+ // Skip non-kernels. See the comments at the top of this file.
+ if (!isKernelFunction(F))
+ return false;
+ for (Argument &Arg : F.args()) {
+ if (Arg.getType()->isPointerTy()) {
+ if (Arg.hasByValAttr())
+ handleByValParam(&Arg);
+ else if (TM && TM->getDrvInterface() == NVPTX::CUDA)
+ handlePointerParam(&Arg);
+ }
+ }
+ return true;
+FunctionPass *
+llvm::createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM) {
+ return new NVPTXLowerKernelArgs(TM);
diff --git a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
deleted file mode 100644
index 68dfbb7..0000000
--- a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-//===-- NVPTXLowerStructArgs.cpp - Copy struct args to local memory =====--===//
-// The LLVM Compiler Infrastructure
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// Copy struct args to local memory. This is needed for kernel functions only.
-// This is a preparation for handling cases like
-// kernel void foo(struct A arg, ...)
-// {
-// struct A *p = &arg;
-// ...
-// ... = p->filed1 ... (this is no generic address for .param)
-// p->filed2 = ... (this is no write access to .param)
-// }
-#include "NVPTX.h"
-#include "NVPTXUtilities.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Pass.h"
-using namespace llvm;
-namespace llvm {
-void initializeNVPTXLowerStructArgsPass(PassRegistry &);
-namespace {
-class NVPTXLowerStructArgs : public FunctionPass {
- bool runOnFunction(Function &F) override;
- void handleStructPtrArgs(Function &);
- void handleParam(Argument *);
- static char ID; // Pass identification, replacement for typeid
- NVPTXLowerStructArgs() : FunctionPass(ID) {}
- const char *getPassName() const override {
- return "Copy structure (byval *) arguments to stack";
- }
-} // namespace
-char NVPTXLowerStructArgs::ID = 1;
-INITIALIZE_PASS(NVPTXLowerStructArgs, "nvptx-lower-struct-args",
- "Lower structure arguments (NVPTX)", false, false)
-void NVPTXLowerStructArgs::handleParam(Argument *Arg) {
- Function *Func = Arg->getParent();
- Instruction *FirstInst = &(Func->getEntryBlock().front());
- PointerType *PType = dyn_cast<PointerType>(Arg->getType());
- assert(PType && "Expecting pointer type in handleParam");
- Type *StructType = PType->getElementType();
- AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst);
- /* Set the alignment to alignment of the byval parameter. This is because,
- * later load/stores assume that alignment, and we are going to replace
- * the use of the byval parameter with this alloca instruction.
- */
- AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1));
- Arg->replaceAllUsesWith(AllocA);
- // Get the intrinsic
- Type *CvtTypes[] = {
- Type::getInt8PtrTy(Func->getParent()->getContext(), ADDRESS_SPACE_PARAM),
- Type::getInt8PtrTy(Func->getParent()->getContext(),
- Function *CvtFunc = Intrinsic::getDeclaration(
- Func->getParent(), Intrinsic::nvvm_ptr_gen_to_param, CvtTypes);
- Value *BitcastArgs[] = {
- new BitCastInst(Arg, Type::getInt8PtrTy(Func->getParent()->getContext(),
- Arg->getName(), FirstInst)};
- CallInst *CallCVT =
- CallInst::Create(CvtFunc, BitcastArgs, "cvt_to_param", FirstInst);
- BitCastInst *BitCast = new BitCastInst(
- CallCVT, PointerType::get(StructType, ADDRESS_SPACE_PARAM),
- Arg->getName(), FirstInst);
- LoadInst *LI = new LoadInst(BitCast, Arg->getName(), FirstInst);
- new StoreInst(LI, AllocA, FirstInst);
-// =============================================================================
-// If the function had a struct ptr arg, say foo(%struct.x *byval %d), then
-// add the following instructions to the first basic block :
-// %temp = alloca %struct.x, align 8
-// %tt1 = bitcast %struct.x * %d to i8 *
-// %tt2 = %tt2
-// %tempd = bitcast i8 addrspace(101) * to %struct.x addrspace(101) *
-// %tv = load %struct.x addrspace(101) * %tempd
-// store %struct.x %tv, %struct.x * %temp, align 8
-// The above code allocates some space in the stack and copies the incoming
-// struct from param space to local space.
-// Then replace all occurences of %d by %temp.
-// =============================================================================
-void NVPTXLowerStructArgs::handleStructPtrArgs(Function &F) {
- for (Argument &Arg : F.args()) {
- if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) {
- handleParam(&Arg);
- }
- }
-// =============================================================================
-// Main function for this pass.
-// =============================================================================
-bool NVPTXLowerStructArgs::runOnFunction(Function &F) {
- // Skip non-kernels. See the comments at the top of this file.
- if (!isKernelFunction(F))
- return false;
- handleStructPtrArgs(F);
- return true;
-FunctionPass *llvm::createNVPTXLowerStructArgsPass() {
- return new NVPTXLowerStructArgs();
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.cpp b/lib/Target/NVPTX/NVPTXMCExpr.cpp
index 779b65e..3c98b9f 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.cpp
+++ b/lib/Target/NVPTX/NVPTXMCExpr.cpp
@@ -16,11 +16,11 @@ using namespace llvm;
#define DEBUG_TYPE "nvptx-mcexpr"
const NVPTXFloatMCExpr*
-NVPTXFloatMCExpr::Create(VariantKind Kind, APFloat Flt, MCContext &Ctx) {
+NVPTXFloatMCExpr::create(VariantKind Kind, APFloat Flt, MCContext &Ctx) {
return new (Ctx) NVPTXFloatMCExpr(Kind, Flt);
-void NVPTXFloatMCExpr::PrintImpl(raw_ostream &OS) const {
+void NVPTXFloatMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
bool Ignored;
unsigned NumHex;
APFloat APF = getAPFloat();
@@ -47,11 +47,14 @@ void NVPTXFloatMCExpr::PrintImpl(raw_ostream &OS) const {
const NVPTXGenericMCSymbolRefExpr*
-NVPTXGenericMCSymbolRefExpr::Create(const MCSymbolRefExpr *SymExpr,
+NVPTXGenericMCSymbolRefExpr::create(const MCSymbolRefExpr *SymExpr,
MCContext &Ctx) {
return new (Ctx) NVPTXGenericMCSymbolRefExpr(SymExpr);
-void NVPTXGenericMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const {
- OS << "generic(" << *SymExpr << ")";
+void NVPTXGenericMCSymbolRefExpr::printImpl(raw_ostream &OS,
+ const MCAsmInfo *MAI) const {
+ OS << "generic(";
+ SymExpr->print(OS, MAI);
+ OS << ")";
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h
index 8c6b219..46b4b33 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -36,17 +36,17 @@ public:
/// @name Construction
/// @{
- static const NVPTXFloatMCExpr *Create(VariantKind Kind, APFloat Flt,
+ static const NVPTXFloatMCExpr *create(VariantKind Kind, APFloat Flt,
MCContext &Ctx);
- static const NVPTXFloatMCExpr *CreateConstantFPSingle(APFloat Flt,
+ static const NVPTXFloatMCExpr *createConstantFPSingle(APFloat Flt,
MCContext &Ctx) {
- return Create(VK_NVPTX_SINGLE_PREC_FLOAT, Flt, Ctx);
+ return create(VK_NVPTX_SINGLE_PREC_FLOAT, Flt, Ctx);
- static const NVPTXFloatMCExpr *CreateConstantFPDouble(APFloat Flt,
+ static const NVPTXFloatMCExpr *createConstantFPDouble(APFloat Flt,
MCContext &Ctx) {
- return Create(VK_NVPTX_DOUBLE_PREC_FLOAT, Flt, Ctx);
+ return create(VK_NVPTX_DOUBLE_PREC_FLOAT, Flt, Ctx);
/// @}
@@ -61,14 +61,14 @@ public:
/// @}
- void PrintImpl(raw_ostream &OS) const override;
- bool EvaluateAsRelocatableImpl(MCValue &Res,
+ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+ bool evaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const override {
return false;
void visitUsedExpr(MCStreamer &Streamer) const override {};
- MCSection *FindAssociatedSection() const override { return nullptr; }
+ MCSection *findAssociatedSection() const override { return nullptr; }
// There are no TLS NVPTXMCExprs at the moment.
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
@@ -92,7 +92,7 @@ public:
/// @{
static const NVPTXGenericMCSymbolRefExpr
- *Create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx);
+ *create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx);
/// @}
/// @name Accessors
@@ -103,14 +103,14 @@ public:
/// @}
- void PrintImpl(raw_ostream &OS) const override;
- bool EvaluateAsRelocatableImpl(MCValue &Res,
+ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+ bool evaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const override {
return false;
void visitUsedExpr(MCStreamer &Streamer) const override {};
- MCSection *FindAssociatedSection() const override { return nullptr; }
+ MCSection *findAssociatedSection() const override { return nullptr; }
// There are no TLS NVPTXMCExprs at the moment.
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index ac27c30..a646668 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -53,7 +53,7 @@ void initializeGenericToNVVMPass(PassRegistry&);
void initializeNVPTXAllocaHoistingPass(PassRegistry &);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
-void initializeNVPTXLowerStructArgsPass(PassRegistry &);
+void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
extern "C" void LLVMInitializeNVPTXTarget() {
@@ -69,7 +69,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
- initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry());
+ initializeNVPTXLowerKernelArgsPass(*PassRegistry::getPassRegistry());
static std::string computeDataLayout(bool is64Bit) {
@@ -163,7 +163,13 @@ void NVPTXPassConfig::addIRPasses() {
+ addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
+ // NVPTXLowerKernelArgs emits alloca for byval parameters which can often
+ // be eliminated by SROA. We do not run SROA right after NVPTXLowerKernelArgs
+ // because we plan to merge NVPTXLowerKernelArgs and
+ // NVPTXFavorNonGenericAddrSpaces into one pass.
+ addPass(createSROAPass());
// FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
// them unused. We could remove dead code in an ad-hoc manner, but that
// requires manual work and might be error-prone.
@@ -181,6 +187,9 @@ void NVPTXPassConfig::addIRPasses() {
// Run NaryReassociate after EarlyCSE/GVN to be more effective.
+ // NaryReassociate on GEPs creates redundant common expressions, so run
+ // EarlyCSE after it.
+ addPass(createEarlyCSEPass());
bool NVPTXPassConfig::addInstSelector() {
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 83de4d9..1736d03 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -24,6 +24,7 @@
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/SourceMgr.h"
@@ -772,7 +773,7 @@ public:
if (const PPCMCExpr *TE = dyn_cast<PPCMCExpr>(Val)) {
int64_t Res;
- if (TE->EvaluateAsConstant(Res))
+ if (TE->evaluateAsConstant(Res))
return CreateContextImm(Res, S, E, IsPPC64);
@@ -814,13 +815,13 @@ addNegOperand(MCInst &Inst, MCOperand &Op, MCContext &Ctx) {
} else if (const MCBinaryExpr *BinExpr = dyn_cast<MCBinaryExpr>(Expr)) {
if (BinExpr->getOpcode() == MCBinaryExpr::Sub) {
- const MCExpr *NE = MCBinaryExpr::CreateSub(BinExpr->getRHS(),
+ const MCExpr *NE = MCBinaryExpr::createSub(BinExpr->getRHS(),
BinExpr->getLHS(), Ctx);
- Inst.addOperand(MCOperand::createExpr(MCUnaryExpr::CreateMinus(Expr, Ctx)));
+ Inst.addOperand(MCOperand::createExpr(MCUnaryExpr::createMinus(Expr, Ctx)));
void PPCAsmParser::ProcessInstruction(MCInst &Inst,
@@ -1330,7 +1331,7 @@ ExtractModifierFromExpr(const MCExpr *E,
return nullptr;
- return MCSymbolRefExpr::Create(&SRE->getSymbol(), Context);
+ return MCSymbolRefExpr::create(&SRE->getSymbol(), Context);
case MCExpr::Unary: {
@@ -1338,7 +1339,7 @@ ExtractModifierFromExpr(const MCExpr *E,
const MCExpr *Sub = ExtractModifierFromExpr(UE->getSubExpr(), Variant);
if (!Sub)
return nullptr;
- return MCUnaryExpr::Create(UE->getOpcode(), Sub, Context);
+ return MCUnaryExpr::create(UE->getOpcode(), Sub, Context);
case MCExpr::Binary: {
@@ -1362,7 +1363,7 @@ ExtractModifierFromExpr(const MCExpr *E,
return nullptr;
- return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, Context);
+ return MCBinaryExpr::create(BE->getOpcode(), LHS, RHS, Context);
@@ -1396,7 +1397,7 @@ FixupVariantKind(const MCExpr *E) {
return E;
- return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, Context);
+ return MCSymbolRefExpr::create(&SRE->getSymbol(), Variant, Context);
case MCExpr::Unary: {
@@ -1404,7 +1405,7 @@ FixupVariantKind(const MCExpr *E) {
const MCExpr *Sub = FixupVariantKind(UE->getSubExpr());
if (Sub == UE->getSubExpr())
return E;
- return MCUnaryExpr::Create(UE->getOpcode(), Sub, Context);
+ return MCUnaryExpr::create(UE->getOpcode(), Sub, Context);
case MCExpr::Binary: {
@@ -1413,7 +1414,7 @@ FixupVariantKind(const MCExpr *E) {
const MCExpr *RHS = FixupVariantKind(BE->getRHS());
if (LHS == BE->getLHS() && RHS == BE->getRHS())
return E;
- return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, Context);
+ return MCBinaryExpr::create(BE->getOpcode(), LHS, RHS, Context);
@@ -1438,7 +1439,7 @@ ParseExpression(const MCExpr *&EVal) {
PPCMCExpr::VariantKind Variant;
const MCExpr *E = ExtractModifierFromExpr(EVal, Variant);
if (E)
- EVal = PPCMCExpr::Create(Variant, E, false, getParser().getContext());
+ EVal = PPCMCExpr::create(Variant, E, false, getParser().getContext());
return false;
@@ -1485,7 +1486,7 @@ ParseDarwinExpression(const MCExpr *&EVal) {
if (getLexer().isNot(AsmToken::RParen))
return Error(Parser.getTok().getLoc(), "expected ')'");
Parser.Lex(); // Eat the ')'
- EVal = PPCMCExpr::Create(Variant, EVal, false, getParser().getContext());
+ EVal = PPCMCExpr::create(Variant, EVal, false, getParser().getContext());
return false;
@@ -1863,7 +1864,7 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
Error(L, "expected identifier in directive");
return false;
- MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ MCSymbolELF *Sym = cast<MCSymbolELF>(getContext().getOrCreateSymbol(Name));
if (getLexer().isNot(AsmToken::Comma)) {
Error(L, "unexpected token in directive");
@@ -1936,19 +1937,19 @@ PPCAsmParser::applyModifierToExpr(const MCExpr *E,
MCContext &Ctx) {
switch (Variant) {
case MCSymbolRefExpr::VK_PPC_LO:
- return PPCMCExpr::Create(PPCMCExpr::VK_PPC_LO, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_LO, E, false, Ctx);
case MCSymbolRefExpr::VK_PPC_HI:
- return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HI, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HI, E, false, Ctx);
case MCSymbolRefExpr::VK_PPC_HA:
- return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HA, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HA, E, false, Ctx);
case MCSymbolRefExpr::VK_PPC_HIGHER:
- return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHER, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHER, E, false, Ctx);
case MCSymbolRefExpr::VK_PPC_HIGHERA:
- return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHERA, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHERA, E, false, Ctx);
case MCSymbolRefExpr::VK_PPC_HIGHEST:
- return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHEST, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHEST, E, false, Ctx);
case MCSymbolRefExpr::VK_PPC_HIGHESTA:
- return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx);
return nullptr;
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 1a130e8..5e1d227 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -445,6 +445,6 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
assert(Op.isExpr() && "unknown operand kind in printOperand");
- O << *Op.getExpr();
+ Op.getExpr()->print(O, &MAI);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 86885e1..72742dc 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -11,12 +11,12 @@
#include "MCTargetDesc/PPCFixupKinds.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
@@ -142,13 +142,14 @@ public:
// to resolve the fixup directly. Emit a relocation and leave
// resolution of the final target address to the linker.
if (const MCSymbolRefExpr *A = Target.getSymA()) {
- const MCSymbolData &Data = Asm.getSymbolData(A->getSymbol());
- // The "other" values are stored in the last 6 bits of the second byte.
- // The traditional defines for STO values assume the full byte and thus
- // the shift to pack it.
- unsigned Other = MCELF::getOther(Data) << 2;
- if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0)
- IsResolved = false;
+ if (const auto *S = dyn_cast<MCSymbolELF>(&A->getSymbol())) {
+ // The "other" values are stored in the last 6 bits of the second
+ // byte. The traditional defines for STO values assume the full byte
+ // and thus the shift to pack it.
+ unsigned Other = S->getOther() << 2;
+ if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0)
+ IsResolved = false;
+ }
@@ -176,7 +177,7 @@ public:
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override {
uint64_t NumNops = Count / 4;
for (uint64_t i = 0; i != NumNops; ++i)
- OW->Write32(0x60000000);
+ OW->write32(0x60000000);
OW->WriteZeros(Count % 4);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 3e3489f..992be5b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -11,9 +11,9 @@
#include "MCTargetDesc/PPCFixupKinds.h"
#include "MCTargetDesc/PPCMCExpr.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
@@ -28,7 +28,7 @@ namespace {
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override;
- bool needsRelocateWithSymbol(const MCSymbolData &SD,
+ bool needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const override;
@@ -395,7 +395,7 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
return Type;
-bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
+bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const {
switch (Type) {
@@ -407,7 +407,7 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
// The "other" values are stored in the last 6 bits of the second byte.
// The traditional defines for STO values assume the full byte and thus
// the shift to pack it.
- unsigned Other = MCELF::getOther(SD) << 2;
+ unsigned Other = cast<MCSymbolELF>(Sym).getOther() << 2;
return (Other & ELF::STO_PPC64_LOCAL_MASK) != 0;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 17f4cd4..9537924 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -22,6 +22,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOpcodes.h"
@@ -116,38 +117,19 @@ public:
switch (Size) {
case 4:
if (IsLittleEndian) {
- OS << (char)(Bits);
- OS << (char)(Bits >> 8);
- OS << (char)(Bits >> 16);
- OS << (char)(Bits >> 24);
+ support::endian::Writer<support::little>(OS).write<uint32_t>(Bits);
} else {
- OS << (char)(Bits >> 24);
- OS << (char)(Bits >> 16);
- OS << (char)(Bits >> 8);
- OS << (char)(Bits);
+ support::endian::Writer<support::big>(OS).write<uint32_t>(Bits);
case 8:
// If we emit a pair of instructions, the first one is
// always in the top 32 bits, even on little-endian.
if (IsLittleEndian) {
- OS << (char)(Bits >> 32);
- OS << (char)(Bits >> 40);
- OS << (char)(Bits >> 48);
- OS << (char)(Bits >> 56);
- OS << (char)(Bits);
- OS << (char)(Bits >> 8);
- OS << (char)(Bits >> 16);
- OS << (char)(Bits >> 24);
+ uint64_t Swapped = (Bits << 32) | (Bits >> 32);
+ support::endian::Writer<support::little>(OS).write<uint64_t>(Swapped);
} else {
- OS << (char)(Bits >> 56);
- OS << (char)(Bits >> 48);
- OS << (char)(Bits >> 40);
- OS << (char)(Bits >> 32);
- OS << (char)(Bits >> 24);
- OS << (char)(Bits >> 16);
- OS << (char)(Bits >> 8);
- OS << (char)(Bits);
+ support::endian::Writer<support::big>(OS).write<uint64_t>(Bits);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index 7204bef..6b97d4c 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -19,12 +19,12 @@ using namespace llvm;
#define DEBUG_TYPE "ppcmcexpr"
const PPCMCExpr*
-PPCMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+PPCMCExpr::create(VariantKind Kind, const MCExpr *Expr,
bool isDarwin, MCContext &Ctx) {
return new (Ctx) PPCMCExpr(Kind, Expr, isDarwin);
-void PPCMCExpr::PrintImpl(raw_ostream &OS) const {
+void PPCMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
if (isDarwinSyntax()) {
switch (Kind) {
default: llvm_unreachable("Invalid kind!");
@@ -34,10 +34,10 @@ void PPCMCExpr::PrintImpl(raw_ostream &OS) const {
OS << '(';
- getSubExpr()->print(OS);
+ getSubExpr()->print(OS, MAI);
OS << ')';
} else {
- getSubExpr()->print(OS);
+ getSubExpr()->print(OS, MAI);
switch (Kind) {
default: llvm_unreachable("Invalid kind!");
@@ -53,21 +53,21 @@ void PPCMCExpr::PrintImpl(raw_ostream &OS) const {
-PPCMCExpr::EvaluateAsConstant(int64_t &Res) const {
+PPCMCExpr::evaluateAsConstant(int64_t &Res) const {
MCValue Value;
- if (!getSubExpr()->EvaluateAsRelocatable(Value, nullptr, nullptr))
+ if (!getSubExpr()->evaluateAsRelocatable(Value, nullptr, nullptr))
return false;
if (!Value.isAbsolute())
return false;
- Res = EvaluateAsInt64(Value.getConstant());
+ Res = evaluateAsInt64(Value.getConstant());
return true;
-PPCMCExpr::EvaluateAsInt64(int64_t Value) const {
+PPCMCExpr::evaluateAsInt64(int64_t Value) const {
switch (Kind) {
case VK_PPC_LO:
return Value & 0xffff;
@@ -90,16 +90,16 @@ PPCMCExpr::EvaluateAsInt64(int64_t Value) const {
-PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+PPCMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const {
MCValue Value;
- if (!getSubExpr()->EvaluateAsRelocatable(Value, Layout, Fixup))
+ if (!getSubExpr()->evaluateAsRelocatable(Value, Layout, Fixup))
return false;
if (Value.isAbsolute()) {
- int64_t Result = EvaluateAsInt64(Value.getConstant());
+ int64_t Result = evaluateAsInt64(Value.getConstant());
if ((Fixup == nullptr || (unsigned)Fixup->getKind() != PPC::fixup_ppc_half16) &&
(Result >= 0x8000))
return false;
@@ -138,7 +138,7 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
Modifier = MCSymbolRefExpr::VK_PPC_HIGHESTA;
- Sym = MCSymbolRefExpr::Create(&Sym->getSymbol(), Modifier, Context);
+ Sym = MCSymbolRefExpr::create(&Sym->getSymbol(), Modifier, Context);
Res = MCValue::get(Sym, Value.getSymB(), Value.getConstant());
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index ca72ccf..a641780 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -34,7 +34,7 @@ private:
const MCExpr *Expr;
bool IsDarwin;
- int64_t EvaluateAsInt64(int64_t Value) const;
+ int64_t evaluateAsInt64(int64_t Value) const;
explicit PPCMCExpr(VariantKind Kind, const MCExpr *Expr, bool IsDarwin)
: Kind(Kind), Expr(Expr), IsDarwin(IsDarwin) {}
@@ -43,22 +43,22 @@ public:
/// @name Construction
/// @{
- static const PPCMCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+ static const PPCMCExpr *create(VariantKind Kind, const MCExpr *Expr,
bool isDarwin, MCContext &Ctx);
- static const PPCMCExpr *CreateLo(const MCExpr *Expr,
+ static const PPCMCExpr *createLo(const MCExpr *Expr,
bool isDarwin, MCContext &Ctx) {
- return Create(VK_PPC_LO, Expr, isDarwin, Ctx);
+ return create(VK_PPC_LO, Expr, isDarwin, Ctx);
- static const PPCMCExpr *CreateHi(const MCExpr *Expr,
+ static const PPCMCExpr *createHi(const MCExpr *Expr,
bool isDarwin, MCContext &Ctx) {
- return Create(VK_PPC_HI, Expr, isDarwin, Ctx);
+ return create(VK_PPC_HI, Expr, isDarwin, Ctx);
- static const PPCMCExpr *CreateHa(const MCExpr *Expr,
+ static const PPCMCExpr *createHa(const MCExpr *Expr,
bool isDarwin, MCContext &Ctx) {
- return Create(VK_PPC_HA, Expr, isDarwin, Ctx);
+ return create(VK_PPC_HA, Expr, isDarwin, Ctx);
/// @}
@@ -77,19 +77,19 @@ public:
/// @}
- void PrintImpl(raw_ostream &OS) const override;
- bool EvaluateAsRelocatableImpl(MCValue &Res,
+ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+ bool evaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
- MCSection *FindAssociatedSection() const override {
- return getSubExpr()->FindAssociatedSection();
+ MCSection *findAssociatedSection() const override {
+ return getSubExpr()->findAssociatedSection();
// There are no TLS PPCMCExprs at the moment.
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
- bool EvaluateAsConstant(int64_t &Res) const;
+ bool evaluateAsConstant(int64_t &Res) const;
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 8474376..1e8e804 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -16,14 +16,14 @@
#include "PPCMCAsmInfo.h"
#include "PPCTargetStreamer.h"
#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
@@ -70,8 +70,8 @@ static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
-static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
- Triple TheTriple(TT);
+static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
+ const Triple &TheTriple) {
bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
TheTriple.getArch() == Triple::ppc64le);
@@ -132,8 +132,14 @@ public:
void emitAbiVersion(int AbiVersion) override {
OS << "\t.abiversion " << AbiVersion << '\n';
- void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override {
- OS << "\t.localentry\t" << *S << ", " << *LocalOffset << '\n';
+ void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
+ const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
+ OS << "\t.localentry\t";
+ S->print(OS, MAI);
+ OS << ", ";
+ LocalOffset->print(OS, MAI);
+ OS << '\n';
@@ -159,25 +165,21 @@ public:
Flags |= (AbiVersion & ELF::EF_PPC64_ABI);
- void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override {
+ void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
MCAssembler &MCA = getStreamer().getAssembler();
- MCSymbolData &Data = getStreamer().getOrCreateSymbolData(S);
int64_t Res;
- if (!LocalOffset->EvaluateAsAbsolute(Res, MCA))
+ if (!LocalOffset->evaluateAsAbsolute(Res, MCA))
report_fatal_error(".localentry expression must be absolute.");
unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res);
if (Res != ELF::decodePPC64LocalEntryOffset(Encoded))
report_fatal_error(".localentry expression cannot be encoded.");
- // The "other" values are stored in the last 6 bits of the second byte.
- // The traditional defines for STO values assume the full byte and thus
- // the shift to pack it.
- unsigned Other = MCELF::getOther(Data) << 2;
+ unsigned Other = S->getOther();
Other |= Encoded;
- MCELF::setOther(Data, Other >> 2);
+ S->setOther(Other);
// For GAS compatibility, unless we already saw a .abiversion directive,
// set e_flags to indicate ELFv2 ABI.
@@ -185,22 +187,18 @@ public:
if ((Flags & ELF::EF_PPC64_ABI) == 0)
MCA.setELFHeaderEFlags(Flags | 2);
- void emitAssignment(MCSymbol *Symbol, const MCExpr *Value) override {
+ void emitAssignment(MCSymbol *S, const MCExpr *Value) override {
+ auto *Symbol = cast<MCSymbolELF>(S);
// When encoding an assignment to set symbol A to symbol B, also copy
// the st_other bits encoding the local entry point offset.
if (Value->getKind() != MCExpr::SymbolRef)
- const MCSymbol &RhsSym =
- static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
- MCSymbolData &Data = getStreamer().getOrCreateSymbolData(&RhsSym);
- MCSymbolData &SymbolData = getStreamer().getOrCreateSymbolData(Symbol);
- // The "other" values are stored in the last 6 bits of the second byte.
- // The traditional defines for STO values assume the full byte and thus
- // the shift to pack it.
- unsigned Other = MCELF::getOther(SymbolData) << 2;
+ const auto &RhsSym = cast<MCSymbolELF>(
+ static_cast<const MCSymbolRefExpr *>(Value)->getSymbol());
+ unsigned Other = Symbol->getOther();
- Other |= (MCELF::getOther(Data) << 2) & ELF::STO_PPC64_LOCAL_MASK;
- MCELF::setOther(SymbolData, Other >> 2);
+ Other |= RhsSym.getOther() & ELF::STO_PPC64_LOCAL_MASK;
+ Symbol->setOther(Other);
@@ -217,7 +215,7 @@ public:
void emitAbiVersion(int AbiVersion) override {
llvm_unreachable("Unknown pseudo-op: .abiversion");
- void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override {
+ void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
llvm_unreachable("Unknown pseudo-op: .localentry");
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index 3c906d2..9d72896 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
namespace {
class PPCMachObjectWriter : public MCMachObjectTargetWriter {
- bool RecordScatteredRelocation(MachObjectWriter *Writer,
+ bool recordScatteredRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -38,10 +38,9 @@ class PPCMachObjectWriter : public MCMachObjectTargetWriter {
PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype)
- : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
- /*UseAggressiveSymbolFolding=*/Is64Bit) {}
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
- void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
+ void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
const MCAsmLayout &Layout, const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) override {
@@ -187,9 +186,9 @@ static uint32_t getFixupOffset(const MCAsmLayout &Layout,
/// \return false if falling back to using non-scattered relocation,
/// otherwise true for normal scattered relocation.
-/// based on X86MachObjectWriter::RecordScatteredRelocation
-/// and ARMMachObjectWriter::RecordScatteredRelocation
-bool PPCMachObjectWriter::RecordScatteredRelocation(
+/// based on X86MachObjectWriter::recordScatteredRelocation
+/// and ARMMachObjectWriter::recordScatteredRelocation
+bool PPCMachObjectWriter::recordScatteredRelocation(
MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
unsigned Log2Size, uint64_t &FixedValue) {
@@ -206,28 +205,26 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- const MCSymbolData *A_SD = &Asm.getSymbolData(*A);
- if (!A_SD->getFragment())
+ if (!A->getFragment())
report_fatal_error("symbol '" + A->getName() +
"' can not be undefined in a subtraction expression");
uint32_t Value = Writer->getSymbolAddress(*A, Layout);
- uint64_t SecAddr =
- Writer->getSectionAddress(A_SD->getFragment()->getParent());
+ uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
FixedValue += SecAddr;
uint32_t Value2 = 0;
if (const MCSymbolRefExpr *B = Target.getSymB()) {
- const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+ const MCSymbol *SB = &B->getSymbol();
- if (!B_SD->getFragment())
+ if (!SB->getFragment())
report_fatal_error("symbol '" + B->getSymbol().getName() +
"' can not be undefined in a subtraction expression");
// FIXME: is Type correct? see include/llvm/Support/MachO.h
Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
- FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+ FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
// FIXME: does FixedValue get used??
@@ -253,7 +250,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
// Is this supposed to follow MCTarget/PPCAsmBackend.cpp:adjustFixupValue()?
- // see PPCMCExpr::EvaluateAsRelocatableImpl()
+ // see PPCMCExpr::evaluateAsRelocatableImpl()
uint32_t other_half = 0;
switch (Type) {
@@ -317,7 +314,7 @@ void PPCMachObjectWriter::RecordPPCRelocation(
// Q: are branch targets ever scattered?
RelocType != MachO::PPC_RELOC_BR24 &&
RelocType != MachO::PPC_RELOC_BR14) {
- RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ recordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
Log2Size, FixedValue);
@@ -346,7 +343,7 @@ void PPCMachObjectWriter::RecordPPCRelocation(
// Resolve constant variables.
if (A->isVariable()) {
int64_t Res;
- if (A->getVariableValue()->EvaluateAsAbsolute(
+ if (A->getVariableValue()->evaluateAsAbsolute(
Res, Layout, Writer->getSectionAddressMap())) {
FixedValue = Res;
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 4f1c3c7..b42b0f9 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -49,7 +49,7 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
@@ -181,14 +181,14 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
case MachineOperand::MO_MachineBasicBlock:
- O << *MO.getMBB()->getSymbol();
+ MO.getMBB()->getSymbol()->print(O, MAI);
case MachineOperand::MO_ConstantPoolIndex:
O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
<< '_' << MO.getIndex();
case MachineOperand::MO_BlockAddress:
- O << *GetBlockAddressSymbol(MO.getBlockAddress());
+ GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI);
case MachineOperand::MO_GlobalAddress: {
// Computing the address of a global symbol, not calling it.
@@ -222,8 +222,8 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
} else {
SymToPrint = getSymbol(GV);
- O << *SymToPrint;
+ SymToPrint->print(O, MAI);
printOffset(MO.getOffset(), O);
@@ -422,11 +422,11 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
TM.getRelocationModel() == Reloc::PIC_)
Kind = MCSymbolRefExpr::VK_PLT;
const MCSymbolRefExpr *TlsRef =
- MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext);
+ MCSymbolRefExpr::create(TlsGetAddr, Kind, OutContext);
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
- const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, VK, OutContext);
+ const MCExpr *SymVar = MCSymbolRefExpr::create(MOSymbol, VK, OutContext);
MCInstBuilder(Subtarget->isPPC64() ?
@@ -464,10 +464,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *GOTSymbol =
const MCExpr *OffsExpr =
- MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol,
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(GOTSymbol,
- MCConstantExpr::Create(4, OutContext),
+ MCConstantExpr::create(4, OutContext),
// Emit the 'bl'.
@@ -486,7 +486,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL)
// FIXME: We would like an efficient form for this, so we don't have to do
// a lot of extra uniquing.
- .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
+ .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
// Emit the label.
@@ -502,9 +502,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *Exp =
- MCSymbolRefExpr::Create(PICOffset, MCSymbolRefExpr::VK_None, OutContext);
+ MCSymbolRefExpr::create(PICOffset, MCSymbolRefExpr::VK_None, OutContext);
const MCExpr *PB =
- MCSymbolRefExpr::Create(MF->getPICBaseSymbol(),
+ MCSymbolRefExpr::create(MF->getPICBaseSymbol(),
const MCOperand TR = TmpInst.getOperand(1);
@@ -512,7 +512,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Step 1: lwz %Rt, .L$poff - .L$pb(%Ri)
TmpInst.getOperand(1) =
- MCOperand::createExpr(MCBinaryExpr::CreateSub(Exp, PB, OutContext));
+ MCOperand::createExpr(MCBinaryExpr::createSub(Exp, PB, OutContext));
TmpInst.getOperand(0) = TR;
TmpInst.getOperand(2) = PICR;
EmitToStreamer(*OutStreamer, TmpInst);
@@ -547,19 +547,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (PL == PICLevel::Small) {
const MCExpr *Exp =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_GOT,
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_GOT,
TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
} else {
MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
- MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_None,
+ MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_None,
const MCExpr *PB =
- MCSymbolRefExpr::Create(OutContext.getOrCreateSymbol(Twine(".LTOC")),
+ MCSymbolRefExpr::create(OutContext.getOrCreateSymbol(Twine(".LTOC")),
- Exp = MCBinaryExpr::CreateSub(Exp, PB, OutContext);
+ Exp = MCBinaryExpr::createSub(Exp, PB, OutContext);
TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
@@ -592,7 +592,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
- MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
+ MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
@@ -639,7 +639,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA,
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA,
TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
@@ -681,7 +681,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *Exp =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
@@ -715,7 +715,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
@@ -729,7 +729,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTprel =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA,
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA,
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8)
@@ -748,7 +748,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *Exp =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO,
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO,
TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
@@ -763,10 +763,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL)
// FIXME: We would like an efficient form for this, so we don't have to do
// a lot of extra uniquing.
- .addExpr(MCSymbolRefExpr::Create(NextInstr, OutContext)));
+ .addExpr(MCSymbolRefExpr::create(NextInstr, OutContext)));
const MCExpr *OffsExpr =
- MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol, OutContext),
- MCSymbolRefExpr::Create(GOTRef, OutContext),
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(GOTSymbol, OutContext),
+ MCSymbolRefExpr::create(GOTRef, OutContext),
OutStreamer->EmitValue(OffsExpr, 4);
@@ -786,10 +786,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::PPC32GOT: {
MCSymbol *GOTSymbol = OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
const MCExpr *SymGotTlsL =
- MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO,
+ MCSymbolRefExpr::create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO,
const MCExpr *SymGotTlsHA =
- MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA,
+ MCSymbolRefExpr::create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA,
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI)
@@ -808,7 +808,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsGD =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA,
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA,
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8)
@@ -825,7 +825,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
- const MCExpr *SymGotTlsGD = MCSymbolRefExpr::Create(
+ const MCExpr *SymGotTlsGD = MCSymbolRefExpr::create(
MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO
: MCSymbolRefExpr::VK_PPC_GOT_TLSGD,
@@ -853,7 +853,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsLD =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA,
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA,
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8)
@@ -870,7 +870,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
- const MCExpr *SymGotTlsLD = MCSymbolRefExpr::Create(
+ const MCExpr *SymGotTlsLD = MCSymbolRefExpr::create(
MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO
: MCSymbolRefExpr::VK_PPC_GOT_TLSLD,
@@ -900,7 +900,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymDtprel =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA,
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA,
@@ -920,7 +920,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymDtprel =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI)
@@ -1012,8 +1012,8 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
// The GOT pointer points to the middle of the GOT, in order to reference the
// entire 64kB range. 0x8000 is the midpoint.
const MCExpr *tocExpr =
- MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(CurrentPos, OutContext),
- MCConstantExpr::Create(0x8000, OutContext),
+ MCBinaryExpr::createAdd(MCSymbolRefExpr::create(CurrentPos, OutContext),
+ MCConstantExpr::create(0x8000, OutContext),
OutStreamer->EmitAssignment(TOCSym, tocExpr);
@@ -1036,10 +1036,10 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
const MCExpr *OffsExpr =
- MCBinaryExpr::CreateSub(
- MCSymbolRefExpr::Create(OutContext.getOrCreateSymbol(Twine(".LTOC")),
+ MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(OutContext.getOrCreateSymbol(Twine(".LTOC")),
- MCSymbolRefExpr::Create(PICBase, OutContext),
+ MCSymbolRefExpr::create(PICBase, OutContext),
OutStreamer->EmitValue(OffsExpr, 4);
@@ -1062,12 +1062,12 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
MCSymbol *Symbol1 = CurrentFnSymForSize;
// Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
// entry point.
- OutStreamer->EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
+ OutStreamer->EmitValue(MCSymbolRefExpr::create(Symbol1, OutContext),
8 /*size*/);
MCSymbol *Symbol2 = OutContext.getOrCreateSymbol(StringRef(".TOC."));
// Generates a R_PPC64_TOC relocation for TOC base insertion.
- MCSymbolRefExpr::Create(Symbol2, MCSymbolRefExpr::VK_PPC_TOCBASE, OutContext),
+ MCSymbolRefExpr::create(Symbol2, MCSymbolRefExpr::VK_PPC_TOCBASE, OutContext),
// Emit a null environment pointer.
OutStreamer->EmitIntValue(0, 8 /* size */);
@@ -1133,22 +1133,22 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
MCSymbol *GlobalEntryLabel = OutContext.createTempSymbol();
const MCSymbolRefExpr *GlobalEntryLabelExp =
- MCSymbolRefExpr::Create(GlobalEntryLabel, OutContext);
+ MCSymbolRefExpr::create(GlobalEntryLabel, OutContext);
MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC."));
const MCExpr *TOCDeltaExpr =
- MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(TOCSymbol, OutContext),
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext),
GlobalEntryLabelExp, OutContext);
const MCExpr *TOCDeltaHi =
- PPCMCExpr::CreateHa(TOCDeltaExpr, false, OutContext);
+ PPCMCExpr::createHa(TOCDeltaExpr, false, OutContext);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
const MCExpr *TOCDeltaLo =
- PPCMCExpr::CreateLo(TOCDeltaExpr, false, OutContext);
+ PPCMCExpr::createLo(TOCDeltaExpr, false, OutContext);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI)
@@ -1157,16 +1157,16 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
MCSymbol *LocalEntryLabel = OutContext.createTempSymbol();
const MCSymbolRefExpr *LocalEntryLabelExp =
- MCSymbolRefExpr::Create(LocalEntryLabel, OutContext);
+ MCSymbolRefExpr::create(LocalEntryLabel, OutContext);
const MCExpr *LocalOffsetExp =
- MCBinaryExpr::CreateSub(LocalEntryLabelExp,
+ MCBinaryExpr::createSub(LocalEntryLabelExp,
GlobalEntryLabelExp, OutContext);
PPCTargetStreamer *TS =
static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
if (TS)
- TS->emitLocalEntry(CurrentFnSym, LocalOffsetExp);
+ TS->emitLocalEntry(cast<MCSymbolELF>(CurrentFnSym), LocalOffsetExp);
@@ -1305,10 +1305,10 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
- const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext);
- const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext);
+ const MCExpr *Anon = MCSymbolRefExpr::create(AnonSymbol, OutContext);
+ const MCExpr *LazyPtrExpr = MCSymbolRefExpr::create(LazyPtr, OutContext);
const MCExpr *Sub =
- MCBinaryExpr::CreateSub(LazyPtrExpr, Anon, OutContext);
+ MCBinaryExpr::createSub(LazyPtrExpr, Anon, OutContext);
// mflr r0
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
@@ -1318,7 +1318,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
// mflr r11
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
// addis r11, r11, ha16(LazyPtr - AnonSymbol)
- const MCExpr *SubHa16 = PPCMCExpr::CreateHa(Sub, true, OutContext);
+ const MCExpr *SubHa16 = PPCMCExpr::createHa(Sub, true, OutContext);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
@@ -1328,7 +1328,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
// ldu r12, lo16(LazyPtr - AnonSymbol)(r11)
// lwzu r12, lo16(LazyPtr - AnonSymbol)(r11)
- const MCExpr *SubLo16 = PPCMCExpr::CreateLo(Sub, true, OutContext);
+ const MCExpr *SubLo16 = PPCMCExpr::createLo(Sub, true, OutContext);
EmitToStreamer(*OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
@@ -1364,7 +1364,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
MCSymbol *Stub = Stubs[i].first;
MCSymbol *RawSym = Stubs[i].second.getPointer();
MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
- const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext);
+ const MCExpr *LazyPtrExpr = MCSymbolRefExpr::create(LazyPtr, OutContext);
@@ -1373,7 +1373,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
// lis r11, ha16(LazyPtr)
const MCExpr *LazyPtrHa16 =
- PPCMCExpr::CreateHa(LazyPtrExpr, true, OutContext);
+ PPCMCExpr::createHa(LazyPtrExpr, true, OutContext);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LIS)
@@ -1381,7 +1381,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
// ldu r12, lo16(LazyPtr)(r11)
// lwzu r12, lo16(LazyPtr)(r11)
const MCExpr *LazyPtrLo16 =
- PPCMCExpr::CreateLo(LazyPtrExpr, true, OutContext);
+ PPCMCExpr::createLo(LazyPtrExpr, true, OutContext);
EmitToStreamer(*OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
@@ -1465,7 +1465,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
// need to be indirect and pc-rel. We accomplish this by using NLPs.
// However, sometimes the types are local to the file. So we need to
// fill in the value for the NLP in those cases.
- OutStreamer->EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+ OutStreamer->EmitValue(MCSymbolRefExpr::create(MCSym.getPointer(),
isPPC64 ? 8 : 4/*size*/);
@@ -1484,7 +1484,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
// .long _foo
- Create(Stubs[i].second.getPointer(),
+ create(Stubs[i].second.getPointer(),
isPPC64 ? 8 : 4/*size*/);
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 0b8e23c..a561d5b 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -148,6 +148,9 @@ class PPCFastISel final : public FastISel {
bool isVSFRCRegister(unsigned Register) const {
return MRI.getRegClass(Register)->getID() == PPC::VSFRCRegClassID;
+ bool isVSSRCRegister(unsigned Register) const {
+ return MRI.getRegClass(Register)->getID() == PPC::VSSRCRegClassID;
+ }
bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt, unsigned DestReg);
bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
@@ -503,8 +506,11 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
// If this is a potential VSX load with an offset of 0, a VSX indexed load can
// be used.
+ bool IsVSSRC = (ResultReg != 0) && isVSSRCRegister(ResultReg);
bool IsVSFRC = (ResultReg != 0) && isVSFRCRegister(ResultReg);
- if (IsVSFRC && (Opc == PPC::LFD) &&
+ bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
+ bool Is64VSXLoad = IsVSSRC && Opc == PPC::LFD;
+ if ((Is32VSXLoad || Is64VSXLoad) &&
(Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
(Addr.Offset == 0)) {
UseOffset = false;
@@ -518,7 +524,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
// into a RegBase.
if (Addr.BaseType == Address::FrameIndexBase) {
// VSX only provides an indexed load.
- if (IsVSFRC && Opc == PPC::LFD) return false;
+ if (Is32VSXLoad || Is64VSXLoad) return false;
MachineMemOperand *MMO =
@@ -532,7 +538,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
// Base reg with offset in range.
} else if (UseOffset) {
// VSX only provides an indexed load.
- if (IsVSFRC && Opc == PPC::LFD) return false;
+ if (Is32VSXLoad || Is64VSXLoad) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
@@ -555,7 +561,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
case PPC::LWA: Opc = PPC::LWAX; break;
case PPC::LWA_32: Opc = PPC::LWAX_32; break;
case PPC::LD: Opc = PPC::LDX; break;
- case PPC::LFS: Opc = PPC::LFSX; break;
+ case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
@@ -636,9 +642,12 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
// If this is a potential VSX store with an offset of 0, a VSX indexed store
// can be used.
+ bool IsVSSRC = isVSSRCRegister(SrcReg);
bool IsVSFRC = isVSFRCRegister(SrcReg);
- if (IsVSFRC && (Opc == PPC::STFD) &&
- (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
+ bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
+ bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
+ if ((Is32VSXStore || Is64VSXStore) &&
+ (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
(Addr.Offset == 0)) {
UseOffset = false;
@@ -648,7 +657,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
// into a RegBase.
if (Addr.BaseType == Address::FrameIndexBase) {
// VSX only provides an indexed store.
- if (IsVSFRC && Opc == PPC::STFD) return false;
+ if (Is32VSXStore || Is64VSXStore) return false;
MachineMemOperand *MMO =
@@ -665,7 +674,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
// Base reg with offset in range.
} else if (UseOffset) {
// VSX only provides an indexed store.
- if (IsVSFRC && Opc == PPC::STFD) return false;
+ if (Is32VSXStore || Is64VSXStore) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
@@ -684,7 +693,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
case PPC::STH8: Opc = PPC::STHX8; break;
case PPC::STW8: Opc = PPC::STWX8; break;
case PPC::STD: Opc = PPC::STDX; break;
- case PPC::STFS: Opc = PPC::STFSX; break;
+ case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index bb9315e..2600ee5 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10825,7 +10825,8 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// isLegalAddressingMode - Return true if the addressing mode represented
// by AM is legal for this target, for a load/store of the specified type.
bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
+ Type *Ty,
+ unsigned AS) const {
// PPC does not allow r+i addressing modes for vectors!
if (Ty->isVectorTy() && AM.BaseOffs != 0)
return false;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index c93de43..7fd3f9c 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -556,7 +556,8 @@ namespace llvm {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can
diff --git a/lib/Target/PowerPC/ b/lib/Target/PowerPC/
index 9685bac..d08b808 100644
--- a/lib/Target/PowerPC/
+++ b/lib/Target/PowerPC/
@@ -1078,6 +1078,82 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
(outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
"xssubsp $XT, $XA, $XB", IIC_VecFP,
[(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
+ // FMA Instructions
+ let BaseName = "XSMADDASP" in {
+ let isCommutable = 1 in
+ def XSMADDASP : XX3Form<60, 1,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ let IsVSXFMAAlt = 1 in
+ def XSMADDMSP : XX3Form<60, 9,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ }
+ let BaseName = "XSMSUBASP" in {
+ let isCommutable = 1 in
+ def XSMSUBASP : XX3Form<60, 17,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fma f32:$XA, f32:$XB,
+ (fneg f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ let IsVSXFMAAlt = 1 in
+ def XSMSUBMSP : XX3Form<60, 25,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ }
+ let BaseName = "XSNMADDASP" in {
+ let isCommutable = 1 in
+ def XSNMADDASP : XX3Form<60, 129,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsnmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
+ f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ let IsVSXFMAAlt = 1 in
+ def XSNMADDMSP : XX3Form<60, 137,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ }
+ let BaseName = "XSNMSUBASP" in {
+ let isCommutable = 1 in
+ def XSNMSUBASP : XX3Form<60, 145,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsnmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
+ (fneg f32:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ let IsVSXFMAAlt = 1 in
+ def XSNMSUBMSP : XX3Form<60, 153,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ }
} // AddedComplexity = 400
} // HasP8Vector
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index f1e2865..05cb6e1 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -142,28 +142,28 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin)
RefKind = MCSymbolRefExpr::VK_PLT;
- const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx);
+ const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx);
if (!MO.isJTI() && MO.getOffset())
- Expr = MCBinaryExpr::CreateAdd(Expr,
- MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Expr = MCBinaryExpr::createAdd(Expr,
+ MCConstantExpr::create(MO.getOffset(), Ctx),
// Subtract off the PIC base if required.
if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) {
const MachineFunction *MF = MO.getParent()->getParent()->getParent();
- const MCExpr *PB = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
- Expr = MCBinaryExpr::CreateSub(Expr, PB, Ctx);
+ const MCExpr *PB = MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
+ Expr = MCBinaryExpr::createSub(Expr, PB, Ctx);
// Add ha16() / lo16() markers if required.
switch (access) {
case PPCII::MO_LO:
- Expr = PPCMCExpr::CreateLo(Expr, isDarwin, Ctx);
+ Expr = PPCMCExpr::createLo(Expr, isDarwin, Ctx);
case PPCII::MO_HA:
- Expr = PPCMCExpr::CreateHa(Expr, isDarwin, Ctx);
+ Expr = PPCMCExpr::createHa(Expr, isDarwin, Ctx);
@@ -193,7 +193,7 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
MCOp = MCOperand::createImm(MO.getImm());
case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(
+ MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
MO.getMBB()->getSymbol(), AP.OutContext));
case MachineOperand::MO_GlobalAddress:
diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/lib/Target/PowerPC/PPCTargetObjectFile.cpp
index 9ad1340..9ee5db9 100644
--- a/lib/Target/PowerPC/PPCTargetObjectFile.cpp
+++ b/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -55,9 +55,9 @@ MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal(
const MCExpr *PPC64LinuxTargetObjectFile::
getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
const MCExpr *Expr =
- MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_PPC_DTPREL, getContext());
- return MCBinaryExpr::CreateAdd(Expr,
- MCConstantExpr::Create(0x8000, getContext()),
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_PPC_DTPREL, getContext());
+ return MCBinaryExpr::createAdd(Expr,
+ MCConstantExpr::create(0x8000, getContext()),
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
index 8aaf5e1..dbe7617 100644
--- a/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -20,7 +20,7 @@ public:
virtual void emitTCEntry(const MCSymbol &S) = 0;
virtual void emitMachine(StringRef CPU) = 0;
virtual void emitAbiVersion(int AbiVersion) = 0;
- virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) = 0;
+ virtual void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) = 0;
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 9b36063..0a05d25 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -105,7 +105,7 @@ namespace ShaderType {
/// a separate piece of memory that is unique from other
/// memory locations.
namespace AMDGPUAS {
-enum AddressSpaces {
+enum AddressSpaces : unsigned {
PRIVATE_ADDRESS = 0, ///< Address space for private memory.
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
CONSTANT_ADDRESS = 2, ///< Address space for constant memory
@@ -137,7 +137,10 @@ enum AddressSpaces {
ADDRESS_NONE = 24, ///< Address space for unknown memory.
+ // Some places use this if the address space can't be determined.
} // namespace AMDGPUAS
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index d00ae78..d56838e 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -257,9 +257,22 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand);
+ setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand);
+ setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
for (MVT VT : ScalarIntVTs) {
@@ -301,6 +314,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+ setOperationAction(ISD::SMIN, MVT::i32, Legal);
+ setOperationAction(ISD::UMIN, MVT::i32, Legal);
+ setOperationAction(ISD::SMAX, MVT::i32, Legal);
+ setOperationAction(ISD::UMAX, MVT::i32, Legal);
if (!Subtarget->hasFFBH())
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
@@ -962,17 +980,17 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case AMDGPUIntrinsic::AMDGPU_imax:
- return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
+ return DAG.getNode(ISD::SMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_umax:
- return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
+ return DAG.getNode(ISD::UMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_imin:
- return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
+ return DAG.getNode(ISD::SMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_umin:
- return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
+ return DAG.getNode(ISD::UMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_umul24:
return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT,
@@ -1050,7 +1068,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
- return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
+ return DAG.getNode(ISD::SMAX, DL, VT, Neg, Op.getOperand(1));
/// Linear Interpolation
@@ -1149,7 +1167,7 @@ SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL,
return SDValue();
-/// \brief Generate Min/Max node
+// FIXME: Remove this when combines added to DAGCombiner.
SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL,
SDValue LHS,
@@ -1165,22 +1183,22 @@ SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL,
switch (CCOpcode) {
case ISD::SETULT: {
- unsigned Opc = (LHS == True) ? AMDGPUISD::UMIN : AMDGPUISD::UMAX;
+ unsigned Opc = (LHS == True) ? ISD::UMIN : ISD::UMAX;
return DAG.getNode(Opc, DL, VT, LHS, RHS);
case ISD::SETLE:
case ISD::SETLT: {
- unsigned Opc = (LHS == True) ? AMDGPUISD::SMIN : AMDGPUISD::SMAX;
+ unsigned Opc = (LHS == True) ? ISD::SMIN : ISD::SMAX;
return DAG.getNode(Opc, DL, VT, LHS, RHS);
case ISD::SETGT:
case ISD::SETGE: {
- unsigned Opc = (LHS == True) ? AMDGPUISD::SMAX : AMDGPUISD::SMIN;
+ unsigned Opc = (LHS == True) ? ISD::SMAX : ISD::SMIN;
return DAG.getNode(Opc, DL, VT, LHS, RHS);
case ISD::SETUGT: {
- unsigned Opc = (LHS == True) ? AMDGPUISD::UMAX : AMDGPUISD::UMIN;
+ unsigned Opc = (LHS == True) ? ISD::UMAX : ISD::UMIN;
return DAG.getNode(Opc, DL, VT, LHS, RHS);
@@ -2644,11 +2662,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -2794,14 +2808,6 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
- computeKnownBitsForMinMax(Op.getOperand(0), Op.getOperand(1),
- KnownZero, KnownOne, DAG, Depth);
- break;
KnownZero = APInt::getHighBitsSet(32, 31);
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index c9f1981..fbb7d3c 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -228,11 +228,7 @@ enum NodeType : unsigned {
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
index f0f10ca..64e295f 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -152,17 +152,15 @@ bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const
return true;
-MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- int FrameIndex) const {
+MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
// TODO: Implement this function
return nullptr;
-MachineInstr *
-AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- MachineInstr *LoadMI) const {
+MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const {
// TODO: Implement this function
return nullptr;
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
index 07042b5..8fd27a1 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -87,9 +87,11 @@ public:
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt,
int FrameIndex) const override;
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt,
MachineInstr *LoadMI) const override;
diff --git a/lib/Target/R600/ b/lib/Target/R600/
index 790f34c..b413897 100644
--- a/lib/Target/R600/
+++ b/lib/Target/R600/
@@ -94,16 +94,6 @@ def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp,
-// out = min(a, b) a and b are signed ints
-def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
-// out = min(a, b) a and b are unsigned ints
-def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
// FIXME: TableGen doesn't like commutative instructions with more
// than 2 operands.
// out = max(a, b, c) a, b and c are floats
diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp
index 9565e3f..2083146 100644
--- a/lib/Target/R600/AMDGPUMCInstLower.cpp
+++ b/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -64,25 +64,25 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOp = MCOperand::createReg(MO.getReg());
case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(
+ MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
MO.getMBB()->getSymbol(), Ctx));
case MachineOperand::MO_GlobalAddress: {
const GlobalValue *GV = MO.getGlobal();
MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName()));
- MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(Sym, Ctx));
+ MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx));
case MachineOperand::MO_TargetIndex: {
assert(MO.getIndex() == AMDGPU::TI_CONSTDATA_START);
MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
- const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
MCOp = MCOperand::createExpr(Expr);
case MachineOperand::MO_ExternalSymbol: {
MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(MO.getSymbolName()));
- const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
MCOp = MCOperand::createExpr(Expr);
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index b262cdf..a5a901c 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -272,7 +272,7 @@ public:
bool enableSubRegLiveness() const override {
- return false;
+ return true;
diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
index 19bffd5..95025a6 100644
--- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
@@ -1084,7 +1084,7 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
case AsmToken::Identifier:
- MCSymbolRefExpr::Create(getContext().getOrCreateSymbol(
+ MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
Parser.getTok().getString()), getContext()), S));
return MatchOperand_Success;
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index 279c3eb..f706769 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -337,7 +337,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
} else if (Op.isExpr()) {
const MCExpr *Exp = Op.getExpr();
- Exp->print(O);
+ Exp->print(O, &MAI);
} else {
llvm_unreachable("unknown operand type in printOperand");
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
index 2605ca5..3713223 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -25,18 +25,18 @@ namespace {
class AMDGPUMCObjectWriter : public MCObjectWriter {
AMDGPUMCObjectWriter(raw_pwrite_stream &OS) : MCObjectWriter(OS, true) {}
- void ExecutePostLayoutBinding(MCAssembler &Asm,
+ void executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) override {
//XXX: Implement if necessary.
- void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
+ void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, bool &IsPCRel,
uint64_t &FixedValue) override {
assert(!"Not implemented");
- void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
+ void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
@@ -64,7 +64,7 @@ public:
} //End anonymous namespace
-void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
+void AMDGPUMCObjectWriter::writeObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
Asm.writeSectionData(&*I, Layout);
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 19d89fb..028a86d 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -11,7 +11,7 @@
#include "AMDGPUMCAsmInfo.h"
using namespace llvm;
-AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfoELF() {
+AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
HasSingleParameterDotFile = false;
MaxInstLength = 16;
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
index 8f75c76..a5bac51 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
@@ -17,7 +17,7 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
-class StringRef;
+class Triple;
// If you need to create another MCAsmInfo class, which inherits from MCAsmInfo,
// you will need to make sure your new class sets PrivateGlobalPrefix to
@@ -26,7 +26,7 @@ class StringRef;
// with 'L' as a local symbol.
class AMDGPUMCAsmInfo : public MCAsmInfoELF {
- explicit AMDGPUMCAsmInfo(StringRef &TT);
+ explicit AMDGPUMCAsmInfo(const Triple &TT);
} // namespace llvm
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index a809564..e683498 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -23,6 +23,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/EndianStream.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -148,15 +149,11 @@ void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
- for (unsigned i = 0; i < 4; i++) {
- OS.write((uint8_t) ((Value >> (8 * i)) & 0xff));
- }
+ support::endian::Writer<support::little>(OS).write(Value);
void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const {
- for (unsigned i = 0; i < 8; i++) {
- EmitByte((Value >> (8 * i)) & 0xff, OS);
- }
+ support::endian::Writer<support::little>(OS).write(Value);
unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const {
diff --git a/lib/Target/R600/ b/lib/Target/R600/
index 7126c82..7beed09 100644
--- a/lib/Target/R600/
+++ b/lib/Target/R600/
@@ -781,10 +781,10 @@ def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>;
def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>;
def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>;
def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>;
-def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>;
-def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>;
-def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>;
-def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>;
+def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", smax>;
+def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", smin>;
+def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", umax>;
+def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", umin>;
def SETE_INT : R600_2OP <
0x3A, "SETE_INT",
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 52bf2ae..12d08cf 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -155,7 +155,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
for (MVT VT : MVT::fp_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
@@ -211,6 +210,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
+ setTargetDAGCombine(ISD::SMIN);
+ setTargetDAGCombine(ISD::SMAX);
+ setTargetDAGCombine(ISD::UMIN);
+ setTargetDAGCombine(ISD::UMAX);
@@ -251,47 +254,83 @@ bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
return false;
-// FIXME: This really needs an address space argument. The immediate offset
-// size is different for different sets of memory instruction sets.
-// The single offset DS instructions have a 16-bit unsigned byte offset.
-// MUBUF / MTBUF have a 12-bit unsigned byte offset, and additionally can do r +
-// r + i with addr64. 32-bit has more addressing mode options. Depending on the
-// resource constant, it can also do (i64 r0) + (i32 r1) * (i14 i).
-// SMRD instructions have an 8-bit, dword offset.
bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
+ Type *Ty, unsigned AS) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
- // Allow a 16-bit unsigned immediate field, since this is what DS instructions
- // use.
- if (!isUInt<16>(AM.BaseOffs))
- return false;
+ switch (AS) {
+ case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions?
+ // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
+ // additionally can do r + r + i with addr64. 32-bit has more addressing
+ // mode options. Depending on the resource constant, it can also do
+ // (i64 r0) + (i32 r1) * (i14 i).
+ //
+ // SMRD instructions have an 8-bit, dword offset.
+ //
+ // Assume nonunifom access, since the address space isn't enough to know
+ // what instruction we will use, and since we don't know if this is a load
+ // or store and scalar stores are only available on VI.
+ //
+ // We also know if we are doing an extload, we can't do a scalar load.
+ //
+ // Private arrays end up using a scratch buffer most of the time, so also
+ // assume those use MUBUF instructions. Scratch loads / stores are currently
+ // implemented as mubuf instructions with offen bit set, so slightly
+ // different than the normal addr64.
+ if (!isUInt<12>(AM.BaseOffs))
+ return false;
- // Only support r+r,
- switch (AM.Scale) {
- case 0: // "r+i" or just "i", depending on HasBaseReg.
- break;
- case 1:
- if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ // FIXME: Since we can split immediate into soffset and immediate offset,
+ // would it make sense to allow any immediate?
+ switch (AM.Scale) {
+ case 0: // r + i or just i, depending on HasBaseReg.
+ return true;
+ case 1:
+ return true; // We have r + r or r + i.
+ case 2:
+ if (AM.HasBaseReg) {
+ // Reject 2 * r + r.
+ return false;
+ }
+ // Allow 2 * r as r + r
+ // Or 2 * r + i is allowed as r + r + i.
+ return true;
+ default: // Don't allow n * r
return false;
- // Otherwise we have r+r or r+i.
- break;
- case 2:
- if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ }
+ }
+ // Basic, single offset DS instructions allow a 16-bit unsigned immediate
+ // field.
+ // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
+ // an 8-bit dword offset but we don't know the alignment here.
+ if (!isUInt<16>(AM.BaseOffs))
return false;
- // Allow 2*r as r+r.
- break;
- default: // Don't allow n * r
+ if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
+ return true;
+ if (AM.Scale == 1 && AM.HasBaseReg)
+ return true;
return false;
- return true;
+ // Flat instructions do not have offsets, and only have the register
+ // address.
+ return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
+ }
+ default:
+ llvm_unreachable("unhandled address space");
+ }
bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
@@ -368,6 +407,12 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return TII->isInlineConstant(Imm);
+static EVT toIntegerVT(EVT VT) {
+ if (VT.isVector())
+ return VT.changeVectorElementTypeToInteger();
+ return MVT::getIntegerVT(VT.getSizeInBits());
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
SDLoc SL, SDValue Chain,
unsigned Offset, bool Signed) const {
@@ -380,20 +425,42 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
- SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
- MRI.getLiveInVirtReg(InputPtrReg), MVT::i64);
- SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, BasePtr,
- DAG.getConstant(Offset, SL, MVT::i64));
+ SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
+ MRI.getLiveInVirtReg(InputPtrReg), PtrVT);
+ SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
+ DAG.getConstant(Offset, SL, PtrVT));
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+ unsigned Align = DL->getABITypeAlignment(Ty);
+ if (VT != MemVT && VT.isFloatingPoint()) {
+ // Do an integer load and convert.
+ // FIXME: This is mostly because load legalization after type legalization
+ // doesn't handle FP extloads.
+ assert(VT.getScalarType() == MVT::f32 &&
+ MemVT.getScalarType() == MVT::f16);
+ EVT IVT = toIntegerVT(VT);
+ EVT MemIVT = toIntegerVT(MemVT);
+ IVT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemIVT,
+ false, // isVolatile
+ true, // isNonTemporal
+ true, // isInvariant
+ Align); // Alignment
+ return DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load);
+ }
+ ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ return DAG.getLoad(ISD::UNINDEXED, ExtTy,
VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT,
false, // isVolatile
true, // isNonTemporal
true, // isInvariant
- DL->getABITypeAlignment(Ty)); // Alignment
+ Align); // Alignment
SDValue SITargetLowering::LowerFormalArguments(
@@ -1570,15 +1637,15 @@ static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
switch (Opc) {
+ case ISD::SMAX:
+ case ISD::UMAX:
+ case ISD::SMIN:
+ case ISD::UMIN:
llvm_unreachable("Not a min/max opcode");
@@ -1664,10 +1731,10 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performSetCCCombine(N, DCI);
case ISD::FMAXNUM: // TODO: What about fmax_legacy?
+ case ISD::SMAX:
+ case ISD::SMIN:
+ case ISD::UMAX:
+ case ISD::UMIN: {
if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
N->getValueType(0) != MVT::f64 &&
getTargetMachine().getOptLevel() > CodeGenOpt::None)
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index a95354c..a956b01 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -63,7 +63,7 @@ public:
EVT /*VT*/) const override;
bool isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const override;
+ Type *Ty, unsigned AS) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
unsigned Align,
diff --git a/lib/Target/R600/ b/lib/Target/R600/
index 839c2e9..2f39074 100644
--- a/lib/Target/R600/
+++ b/lib/Target/R600/
@@ -224,16 +224,16 @@ defm S_SUBB_U32 : SOP2_32 <sop2<0x05>, "s_subb_u32",
} // End Uses = [SCC]
defm S_MIN_I32 : SOP2_32 <sop2<0x06>, "s_min_i32",
- [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]
+ [(set i32:$dst, (smin i32:$src0, i32:$src1))]
defm S_MIN_U32 : SOP2_32 <sop2<0x07>, "s_min_u32",
- [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]
+ [(set i32:$dst, (umin i32:$src0, i32:$src1))]
defm S_MAX_I32 : SOP2_32 <sop2<0x08>, "s_max_i32",
- [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]
+ [(set i32:$dst, (smax i32:$src0, i32:$src1))]
defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32",
- [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]
+ [(set i32:$dst, (umax i32:$src0, i32:$src1))]
} // End Defs = [SCC]
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 388cb65..6b3b51a 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -408,7 +408,7 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
uint64_t ImmValue = IsImm ? MCValOp.getImm() : 0;
const MCExpr *ValExpr;
if (IsImm)
- ValExpr = MCConstantExpr::Create(ImmValue, getContext());
+ ValExpr = MCConstantExpr::create(ImmValue, getContext());
ValExpr = MCValOp.getExpr();
@@ -417,7 +417,7 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
if (!IsImm || (ImmValue & ~0x1fff)) {
MCInst TmpInst;
const MCExpr *Expr =
- SparcMCExpr::Create(SparcMCExpr::VK_Sparc_HI, ValExpr, getContext());
+ SparcMCExpr::create(SparcMCExpr::VK_Sparc_HI, ValExpr, getContext());
@@ -429,7 +429,7 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
if (!IsImm || ((ImmValue & 0x1fff) != 0 || ImmValue == 0)) {
MCInst TmpInst;
const MCExpr *Expr =
- SparcMCExpr::Create(SparcMCExpr::VK_Sparc_LO, ValExpr, getContext());
+ SparcMCExpr::create(SparcMCExpr::VK_Sparc_LO, ValExpr, getContext());
@@ -774,11 +774,11 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
- const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
+ const MCExpr *Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
if (isCall &&
getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_)
- Res = SparcMCExpr::Create(SparcMCExpr::VK_Sparc_WPLT30, Res,
+ Res = SparcMCExpr::create(SparcMCExpr::VK_Sparc_WPLT30, Res,
Op = SparcOperand::CreateImm(Res, S, E);
@@ -1010,7 +1010,7 @@ bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal,
- EVal = SparcMCExpr::Create(VK, subExpr, getContext());
+ EVal = SparcMCExpr::create(VK, subExpr, getContext());
return true;
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
index bac2617..5d714fe 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
+++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
@@ -120,7 +120,7 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
assert(MO.isExpr() && "Unknown operand kind in printOperand");
- MO.getExpr()->print(O);
+ MO.getExpr()->print(O, &MAI);
void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum,
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 3792a59..9388527 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -260,7 +260,7 @@ namespace {
uint64_t NumNops = Count / 4;
for (uint64_t i = 0; i != NumNops; ++i)
- OW->Write32(0x01000000);
+ OW->write32(0x01000000);
return true;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index 124cb3b..280c6d7 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -20,8 +20,7 @@ using namespace llvm;
void SparcELFMCAsmInfo::anchor() {}
-SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) {
- Triple TheTriple(TT);
+SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Triple &TheTriple) {
bool isV9 = (TheTriple.getArch() == Triple::sparcv9);
IsLittleEndian = (TheTriple.getArch() == Triple::sparcel);
@@ -51,8 +50,8 @@ SparcELFMCAsmInfo::getExprForPersonalitySymbol(const MCSymbol *Sym,
MCStreamer &Streamer) const {
if (Encoding & dwarf::DW_EH_PE_pcrel) {
MCContext &Ctx = Streamer.getContext();
- return SparcMCExpr::Create(SparcMCExpr::VK_Sparc_R_DISP32,
- MCSymbolRefExpr::Create(Sym, Ctx), Ctx);
+ return SparcMCExpr::create(SparcMCExpr::VK_Sparc_R_DISP32,
+ MCSymbolRefExpr::create(Sym, Ctx), Ctx);
return MCAsmInfo::getExprForPersonalitySymbol(Sym, Encoding, Streamer);
@@ -64,8 +63,8 @@ SparcELFMCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym,
MCStreamer &Streamer) const {
if (Encoding & dwarf::DW_EH_PE_pcrel) {
MCContext &Ctx = Streamer.getContext();
- return SparcMCExpr::Create(SparcMCExpr::VK_Sparc_R_DISP32,
- MCSymbolRefExpr::Create(Sym, Ctx), Ctx);
+ return SparcMCExpr::create(SparcMCExpr::VK_Sparc_R_DISP32,
+ MCSymbolRefExpr::create(Sym, Ctx), Ctx);
return MCAsmInfo::getExprForFDESymbol(Sym, Encoding, Streamer);
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
index 84de551..12386f1 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -17,12 +17,12 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
-class StringRef;
+class Triple;
class SparcELFMCAsmInfo : public MCAsmInfoELF {
void anchor() override;
- explicit SparcELFMCAsmInfo(StringRef TT);
+ explicit SparcELFMCAsmInfo(const Triple &TheTriple);
const MCExpr*
getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding,
MCStreamer &Streamer) const override;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index 34079ee..9171d4d 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -86,16 +86,10 @@ void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
if (Ctx.getAsmInfo()->isLittleEndian()) {
// Output the bits in little-endian byte order.
- for (unsigned i = 0; i != 4; ++i) {
- OS << (char)Bits;
- Bits >>= 8;
- }
+ support::endian::Writer<support::little>(OS).write<uint32_t>(Bits);
} else {
// Output the bits in big-endian byte order.
- for (unsigned i = 0; i != 4; ++i) {
- OS << (char)(Bits >> 24);
- Bits <<= 8;
- }
+ support::endian::Writer<support::big>(OS).write<uint32_t>(Bits);
unsigned tlsOpNo = 0;
switch (MI.getOpcode()) {
@@ -137,7 +131,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
int64_t Res;
- if (Expr->EvaluateAsAbsolute(Res))
+ if (Expr->evaluateAsAbsolute(Res))
return Res;
llvm_unreachable("Unhandled expression!");
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
index d97e3a2..e85a8cd 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
@@ -15,9 +15,8 @@
#include "SparcMCExpr.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCObjectStreamer.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Object/ELF.h"
@@ -26,20 +25,17 @@ using namespace llvm;
#define DEBUG_TYPE "sparcmcexpr"
const SparcMCExpr*
-SparcMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+SparcMCExpr::create(VariantKind Kind, const MCExpr *Expr,
MCContext &Ctx) {
return new (Ctx) SparcMCExpr(Kind, Expr);
-void SparcMCExpr::PrintImpl(raw_ostream &OS) const
+void SparcMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
bool closeParen = printVariantKind(OS, Kind);
const MCExpr *Expr = getSubExpr();
- Expr->print(OS);
+ Expr->print(OS, MAI);
if (closeParen)
OS << ')';
@@ -160,10 +156,10 @@ Sparc::Fixups SparcMCExpr::getFixupKind(SparcMCExpr::VariantKind Kind) {
-SparcMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+SparcMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const {
- return getSubExpr()->EvaluateAsRelocatable(Res, Layout, Fixup);
+ return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup);
static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
@@ -184,8 +180,7 @@ static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
case MCExpr::SymbolRef: {
const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
- MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol());
+ cast<MCSymbolELF>(SymRef.getSymbol()).setType(ELF::STT_TLS);
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
index 116e104..d08ad86 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
@@ -69,7 +69,7 @@ public:
/// @name Construction
/// @{
- static const SparcMCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+ static const SparcMCExpr *create(VariantKind Kind, const MCExpr *Expr,
MCContext &Ctx);
/// @}
/// @name Accessors
@@ -85,13 +85,13 @@ public:
Sparc::Fixups getFixupKind() const { return getFixupKind(Kind); }
/// @}
- void PrintImpl(raw_ostream &OS) const override;
- bool EvaluateAsRelocatableImpl(MCValue &Res,
+ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+ bool evaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
- MCSection *FindAssociatedSection() const override {
- return getSubExpr()->FindAssociatedSection();
+ MCSection *findAssociatedSection() const override {
+ return getSubExpr()->findAssociatedSection();
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 4d5672e..d34c879 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -34,7 +34,7 @@ using namespace llvm;
#include ""
static MCAsmInfo *createSparcMCAsmInfo(const MCRegisterInfo &MRI,
- StringRef TT) {
+ const Triple &TT) {
MCAsmInfo *MAI = new SparcELFMCAsmInfo(TT);
unsigned Reg = MRI.getDwarfRegNum(SP::O6, true);
MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0);
@@ -43,7 +43,7 @@ static MCAsmInfo *createSparcMCAsmInfo(const MCRegisterInfo &MRI,
static MCAsmInfo *createSparcV9MCAsmInfo(const MCRegisterInfo &MRI,
- StringRef TT) {
+ const Triple &TT) {
MCAsmInfo *MAI = new SparcELFMCAsmInfo(TT);
unsigned Reg = MRI.getDwarfRegNum(SP::O6, true);
MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 2047);
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index 9903bc5..c5f046b 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -78,9 +78,9 @@ namespace {
static MCOperand createSparcMCOperand(SparcMCExpr::VariantKind Kind,
MCSymbol *Sym, MCContext &OutContext) {
- const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Sym,
+ const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Sym,
- const SparcMCExpr *expr = SparcMCExpr::Create(Kind, MCSym, OutContext);
+ const SparcMCExpr *expr = SparcMCExpr::create(Kind, MCSym, OutContext);
return MCOperand::createExpr(expr);
@@ -94,15 +94,15 @@ static MCOperand createPCXRelExprOp(SparcMCExpr::VariantKind Kind,
MCSymbol *CurLabel,
MCContext &OutContext)
- const MCSymbolRefExpr *GOT = MCSymbolRefExpr::Create(GOTLabel, OutContext);
- const MCSymbolRefExpr *Start = MCSymbolRefExpr::Create(StartLabel,
+ const MCSymbolRefExpr *GOT = MCSymbolRefExpr::create(GOTLabel, OutContext);
+ const MCSymbolRefExpr *Start = MCSymbolRefExpr::create(StartLabel,
- const MCSymbolRefExpr *Cur = MCSymbolRefExpr::Create(CurLabel,
+ const MCSymbolRefExpr *Cur = MCSymbolRefExpr::create(CurLabel,
- const MCBinaryExpr *Sub = MCBinaryExpr::CreateSub(Cur, Start, OutContext);
- const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(GOT, Sub, OutContext);
- const SparcMCExpr *expr = SparcMCExpr::Create(Kind,
+ const MCBinaryExpr *Sub = MCBinaryExpr::createSub(Cur, Start, OutContext);
+ const MCBinaryExpr *Add = MCBinaryExpr::createAdd(GOT, Sub, OutContext);
+ const SparcMCExpr *expr = SparcMCExpr::create(Kind,
Add, OutContext);
return MCOperand::createExpr(expr);
@@ -199,7 +199,7 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
EmitHiLo(*OutStreamer, GOTLabel,
SparcMCExpr::VK_Sparc_H44, SparcMCExpr::VK_Sparc_M44,
MCRegOP, OutContext, STI);
- MCOperand imm = MCOperand::createExpr(MCConstantExpr::Create(12,
+ MCOperand imm = MCOperand::createExpr(MCConstantExpr::create(12,
EmitSHL(*OutStreamer, MCRegOP, imm, MCRegOP, STI);
MCOperand lo = createSparcMCOperand(SparcMCExpr::VK_Sparc_L44,
@@ -211,7 +211,7 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
EmitHiLo(*OutStreamer, GOTLabel,
SparcMCExpr::VK_Sparc_HH, SparcMCExpr::VK_Sparc_HM,
MCRegOP, OutContext, STI);
- MCOperand imm = MCOperand::createExpr(MCConstantExpr::Create(32,
+ MCOperand imm = MCOperand::createExpr(MCConstantExpr::create(32,
EmitSHL(*OutStreamer, MCRegOP, imm, MCRegOP, STI);
// Use register %o7 to load the lower 32 bits.
@@ -361,10 +361,10 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
O << (int)MO.getImm();
case MachineOperand::MO_MachineBasicBlock:
- O << *MO.getMBB()->getSymbol();
+ MO.getMBB()->getSymbol()->print(O, MAI);
case MachineOperand::MO_GlobalAddress:
- O << *getSymbol(MO.getGlobal());
+ getSymbol(MO.getGlobal())->print(O, MAI);
case MachineOperand::MO_BlockAddress:
O << GetBlockAddressSymbol(MO.getBlockAddress())->getName();
diff --git a/lib/Target/Sparc/SparcMCInstLower.cpp b/lib/Target/Sparc/SparcMCInstLower.cpp
index 9388d59..b084d00 100644
--- a/lib/Target/Sparc/SparcMCInstLower.cpp
+++ b/lib/Target/Sparc/SparcMCInstLower.cpp
@@ -59,9 +59,9 @@ static MCOperand LowerSymbolOperand(const MachineInstr *MI,
- const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol,
+ const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Symbol,
- const SparcMCExpr *expr = SparcMCExpr::Create(Kind, MCSym,
+ const SparcMCExpr *expr = SparcMCExpr::create(Kind, MCSym,
return MCOperand::createExpr(expr);
diff --git a/lib/Target/Sparc/SparcTargetObjectFile.cpp b/lib/Target/Sparc/SparcTargetObjectFile.cpp
index 32b2240..412e124 100644
--- a/lib/Target/Sparc/SparcTargetObjectFile.cpp
+++ b/lib/Target/Sparc/SparcTargetObjectFile.cpp
@@ -34,8 +34,8 @@ const MCExpr *SparcELFTargetObjectFile::getTTypeGlobalReference(
MCContext &Ctx = getContext();
- return SparcMCExpr::Create(SparcMCExpr::VK_Sparc_R_DISP32,
- MCSymbolRefExpr::Create(SSym, Ctx), Ctx);
+ return SparcMCExpr::create(SparcMCExpr::VK_Sparc_R_DISP32,
+ MCSymbolRefExpr::create(SSym, Ctx), Ctx);
return TargetLoweringObjectFileELF::getTTypeGlobalReference(
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index b721def..3aa4c6b 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -865,9 +865,9 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
MCSymbol *Sym = Ctx.createTempSymbol();
- const MCExpr *Base = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
+ const MCExpr *Base = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
- Expr = Value == 0 ? Base : MCBinaryExpr::CreateAdd(Base, Expr, Ctx);
+ Expr = Value == 0 ? Base : MCBinaryExpr::createAdd(Base, Expr, Ctx);
// Optionally match :tls_gdcall: or :tls_ldcall: followed by a TLS symbol.
@@ -904,7 +904,7 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
StringRef Identifier = Parser.getTok().getString();
- Sym = MCSymbolRefExpr::Create(Ctx.getOrCreateSymbol(Identifier),
+ Sym = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(Identifier),
Kind, Ctx);
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
index 373ddfa..059ae3f 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -37,13 +37,14 @@ void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp,
-void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) {
+void SystemZInstPrinter::printOperand(const MCOperand &MO, const MCAsmInfo *MAI,
+ raw_ostream &O) {
if (MO.isReg())
O << '%' << getRegisterName(MO.getReg());
else if (MO.isImm())
O << MO.getImm();
else if (MO.isExpr())
- O << *MO.getExpr();
+ MO.getExpr()->print(O, MAI);
llvm_unreachable("Invalid operand");
@@ -147,7 +148,7 @@ void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum,
O << "0x";
} else
- O << *MO.getExpr();
+ MO.getExpr()->print(O, &MAI);
void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum,
@@ -175,7 +176,7 @@ void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum,
void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
- printOperand(MI->getOperand(OpNum), O);
+ printOperand(MI->getOperand(OpNum), &MAI, O);
void SystemZInstPrinter::printBDAddrOperand(const MCInst *MI, int OpNum,
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index 847b696..ba55e68 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -35,7 +35,8 @@ public:
raw_ostream &O);
// Print the given operand.
- static void printOperand(const MCOperand &MO, raw_ostream &O);
+ static void printOperand(const MCOperand &MO, const MCAsmInfo *MAI,
+ raw_ostream &O);
// Override MCInstPrinter.
void printRegName(raw_ostream &O, unsigned RegNo) const override;
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 1c3887a..0e8a680 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -105,7 +105,7 @@ void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
bool SystemZMCAsmBackend::writeNopData(uint64_t Count,
MCObjectWriter *OW) const {
for (uint64_t I = 0; I != Count; ++I)
- OW->Write8(7);
+ OW->write8(7);
return true;
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index 0161d62..b17977d 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -13,7 +13,7 @@
using namespace llvm;
-SystemZMCAsmInfo::SystemZMCAsmInfo(StringRef TT) {
+SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) {
PointerSize = 8;
CalleeSaveStackSlotSize = 8;
IsLittleEndian = false;
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
index 19b5b4b..800f892 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
@@ -14,11 +14,11 @@
#include "llvm/Support/Compiler.h"
namespace llvm {
-class StringRef;
+class Triple;
class SystemZMCAsmInfo : public MCAsmInfoELF {
- explicit SystemZMCAsmInfo(StringRef TT);
+ explicit SystemZMCAsmInfo(const Triple &TT);
} // end namespace llvm
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index c9290c1..fd52a2e 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -217,7 +217,7 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
const MCOperand &MO = MI.getOperand(OpNum);
const MCExpr *Expr;
if (MO.isImm())
- Expr = MCConstantExpr::Create(MO.getImm() + Offset, Ctx);
+ Expr = MCConstantExpr::create(MO.getImm() + Offset, Ctx);
else {
Expr = MO.getExpr();
if (Offset) {
@@ -225,8 +225,8 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
// is relative to the operand field itself, which is Offset bytes
// into MI. Add Offset to the relocation value to cancel out
// this difference.
- const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx);
- Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
+ const MCExpr *OffsetExpr = MCConstantExpr::create(Offset, Ctx);
+ Expr = MCBinaryExpr::createAdd(Expr, OffsetExpr, Ctx);
Fixups.push_back(MCFixup::create(Offset, Expr, (MCFixupKind)Kind));
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 8c2075a..92681cf 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -132,7 +132,7 @@ unsigned SystemZMC::getFirstReg(unsigned Reg) {
static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI,
- StringRef TT) {
+ const Triple &TT) {
MCAsmInfo *MAI = new SystemZMCAsmInfo(TT);
MCCFIInstruction Inst =
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index a0d079f..3dca7bd 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -68,14 +68,14 @@ static MCInst lowerRIEfLow(const MachineInstr *MI, unsigned Opcode) {
static const MCSymbolRefExpr *getTLSGetOffset(MCContext &Context) {
StringRef Name = "__tls_get_offset";
- return MCSymbolRefExpr::Create(Context.getOrCreateSymbol(Name),
+ return MCSymbolRefExpr::create(Context.getOrCreateSymbol(Name),
static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) {
StringRef Name = "_GLOBAL_OFFSET_TABLE_";
- return MCSymbolRefExpr::Create(Context.getOrCreateSymbol(Name),
+ return MCSymbolRefExpr::create(Context.getOrCreateSymbol(Name),
@@ -285,7 +285,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
auto *ZCPV = static_cast<SystemZConstantPoolValue*>(MCPV);
const MCExpr *Expr =
- MCSymbolRefExpr::Create(getSymbol(ZCPV->getGlobalValue()),
+ MCSymbolRefExpr::create(getSymbol(ZCPV->getGlobalValue()),
uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType());
@@ -305,7 +305,7 @@ bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
} else {
SystemZMCInstLower Lower(MF->getContext(), *this);
MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo)));
- SystemZInstPrinter::printOperand(MO, OS);
+ SystemZInstPrinter::printOperand(MO, MAI, OS);
return false;
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 24b5a41..91e12c2 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -506,9 +506,10 @@ bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
*Fast = true;
return true;
bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
+ Type *Ty,
+ unsigned AS) const {
// Punt on globals for now, although they can be used in limited
if (AM.BaseGV)
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index b001abc..2f7617b 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -369,7 +369,8 @@ public:
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
bool isLegalICmpImmediate(int64_t Imm) const override;
bool isLegalAddImmediate(int64_t Imm) const override;
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
unsigned Align,
bool *Fast) const override;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 9059885..4346850 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -752,10 +752,9 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return nullptr;
-MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- int FrameIndex) const {
+MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Size = MFI->getObjectSize(FrameIndex);
unsigned Opcode = MI->getOpcode();
@@ -765,9 +764,11 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
isInt<8>(MI->getOperand(2).getImm()) &&
!MI->getOperand(3).getReg()) {
// LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST
- return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::AGSI))
- .addFrameIndex(FrameIndex).addImm(0)
- .addImm(MI->getOperand(2).getImm());
+ return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(),
+ get(SystemZ::AGSI))
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addImm(MI->getOperand(2).getImm());
return nullptr;
@@ -786,9 +787,11 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
isInt<8>(MI->getOperand(2).getImm())) {
// A(G)HI %reg, CONST -> A(G)SI %mem, CONST
Opcode = (Opcode == SystemZ::AHI ? SystemZ::ASI : SystemZ::AGSI);
- return BuildMI(MF, MI->getDebugLoc(), get(Opcode))
- .addFrameIndex(FrameIndex).addImm(0)
- .addImm(MI->getOperand(2).getImm());
+ return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(),
+ get(Opcode))
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addImm(MI->getOperand(2).getImm());
if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) {
@@ -798,17 +801,23 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// source register instead.
if (OpNum == 0) {
unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD;
- return BuildMI(MF, MI->getDebugLoc(), get(StoreOpcode))
- .addOperand(MI->getOperand(1)).addFrameIndex(FrameIndex)
- .addImm(0).addReg(0);
+ return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(),
+ get(StoreOpcode))
+ .addOperand(MI->getOperand(1))
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addReg(0);
// If we're spilling the source of an LDGR or LGDR, load the
// destination register instead.
if (OpNum == 1) {
unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD;
unsigned Dest = MI->getOperand(0).getReg();
- return BuildMI(MF, MI->getDebugLoc(), get(LoadOpcode), Dest)
- .addFrameIndex(FrameIndex).addImm(0).addReg(0);
+ return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(),
+ get(LoadOpcode), Dest)
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addReg(0);
@@ -830,17 +839,25 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
if (MMO->getSize() == Size && !MMO->isVolatile()) {
// Handle conversion of loads.
if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad)) {
- return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC))
- .addFrameIndex(FrameIndex).addImm(0).addImm(Size)
- .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm())
- .addMemOperand(MMO);
+ return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(),
+ get(SystemZ::MVC))
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addImm(Size)
+ .addOperand(MI->getOperand(1))
+ .addImm(MI->getOperand(2).getImm())
+ .addMemOperand(MMO);
// Handle conversion of stores.
if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore)) {
- return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC))
- .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm())
- .addImm(Size).addFrameIndex(FrameIndex).addImm(0)
- .addMemOperand(MMO);
+ return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(),
+ get(SystemZ::MVC))
+ .addOperand(MI->getOperand(1))
+ .addImm(MI->getOperand(2).getImm())
+ .addImm(Size)
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addMemOperand(MMO);
@@ -856,7 +873,8 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
assert(AccessBytes != 0 && "Size of access should be known");
assert(AccessBytes <= Size && "Access outside the frame index");
uint64_t Offset = Size - AccessBytes;
- MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(MemOpcode));
+ MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
+ MI->getDebugLoc(), get(MemOpcode));
for (unsigned I = 0; I < OpNum; ++I)
@@ -869,10 +887,9 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
return nullptr;
-MachineInstr *
-SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- MachineInstr *LoadMI) const {
+MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const {
return nullptr;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index b55810b..e47f2ee 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -187,9 +187,11 @@ public:
LiveVariables *LV) const override;
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt,
int FrameIndex) const override;
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt,
MachineInstr *LoadMI) const override;
bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const override;
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
diff --git a/lib/Target/SystemZ/SystemZMCInstLower.cpp b/lib/Target/SystemZ/SystemZMCInstLower.cpp
index a1dceda..2655e48 100644
--- a/lib/Target/SystemZ/SystemZMCInstLower.cpp
+++ b/lib/Target/SystemZ/SystemZMCInstLower.cpp
@@ -68,11 +68,11 @@ SystemZMCInstLower::getExpr(const MachineOperand &MO,
llvm_unreachable("unknown operand type");
- const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, Kind, Ctx);
+ const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, Ctx);
if (HasOffset)
if (int64_t Offset = MO.getOffset()) {
- const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx);
- Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
+ const MCExpr *OffsetExpr = MCConstantExpr::create(Offset, Ctx);
+ Expr = MCBinaryExpr::createAdd(Expr, OffsetExpr, Ctx);
return Expr;
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index a184b92..d498bb1 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -313,7 +313,7 @@ const MCExpr *TargetLoweringObjectFile::getTTypeGlobalReference(
const TargetMachine &TM, MachineModuleInfo *MMI,
MCStreamer &Streamer) const {
const MCSymbolRefExpr *Ref =
- MCSymbolRefExpr::Create(TM.getSymbol(GV, Mang), getContext());
+ MCSymbolRefExpr::create(TM.getSymbol(GV, Mang), getContext());
return getTTypeReference(Ref, Encoding, Streamer);
@@ -332,8 +332,8 @@ getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
// .-foo addressing.
MCSymbol *PCSym = getContext().createTempSymbol();
- const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
- return MCBinaryExpr::CreateSub(Sym, PC, getContext());
+ const MCExpr *PC = MCSymbolRefExpr::create(PCSym, getContext());
+ return MCBinaryExpr::createSub(Sym, PC, getContext());
@@ -341,7 +341,7 @@ getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
const MCExpr *TargetLoweringObjectFile::getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
// FIXME: It's not clear what, if any, default this should have - perhaps a
// null return could mean 'no location' & we should just do that here.
- return MCSymbolRefExpr::Create(Sym, *Ctx);
+ return MCSymbolRefExpr::create(Sym, *Ctx);
void TargetLoweringObjectFile::getNameWithPrefix(
diff --git a/lib/Target/TargetRecip.cpp b/lib/Target/TargetRecip.cpp
new file mode 100644
index 0000000..42bc487
--- /dev/null
+++ b/lib/Target/TargetRecip.cpp
@@ -0,0 +1,225 @@
+//===-------------------------- TargetRecip.cpp ---------------------------===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This class is used to customize machine-specific reciprocal estimate code
+// generation in a target-independent way.
+// If a target does not support operations in this specification, then code
+// generation will default to using supported operations.
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetRecip.h"
+#include <map>
+using namespace llvm;
+// These are the names of the individual reciprocal operations. These are
+// the key strings for queries and command-line inputs.
+// In addition, the command-line interface recognizes the global parameters
+// "all", "none", and "default".
+static const char *RecipOps[] = {
+ "divd",
+ "divf",
+ "vec-divd",
+ "vec-divf",
+ "sqrtd",
+ "sqrtf",
+ "vec-sqrtd",
+ "vec-sqrtf",
+// The uninitialized state is needed for the enabled settings and refinement
+// steps because custom settings may arrive via the command-line before target
+// defaults are set.
+TargetRecip::TargetRecip() {
+ unsigned NumStrings = llvm::array_lengthof(RecipOps);
+ for (unsigned i = 0; i < NumStrings; ++i)
+ RecipMap.insert(std::make_pair(RecipOps[i], RecipParams()));
+static bool parseRefinementStep(const StringRef &In, size_t &Position,
+ uint8_t &Value) {
+ const char RefStepToken = ':';
+ Position = In.find(RefStepToken);
+ if (Position == StringRef::npos)
+ return false;
+ StringRef RefStepString = In.substr(Position + 1);
+ // Allow exactly one numeric character for the additional refinement
+ // step parameter.
+ if (RefStepString.size() == 1) {
+ char RefStepChar = RefStepString[0];
+ if (RefStepChar >= '0' && RefStepChar <= '9') {
+ Value = RefStepChar - '0';
+ return true;
+ }
+ }
+ report_fatal_error("Invalid refinement step for -recip.");
+bool TargetRecip::parseGlobalParams(const std::string &Arg) {
+ StringRef ArgSub = Arg;
+ // Look for an optional setting of the number of refinement steps needed
+ // for this type of reciprocal operation.
+ size_t RefPos;
+ uint8_t RefSteps;
+ StringRef RefStepString;
+ if (parseRefinementStep(ArgSub, RefPos, RefSteps)) {
+ // Split the string for further processing.
+ RefStepString = ArgSub.substr(RefPos + 1);
+ ArgSub = ArgSub.substr(0, RefPos);
+ }
+ bool Enable;
+ bool UseDefaults;
+ if (ArgSub == "all") {
+ UseDefaults = false;
+ Enable = true;
+ } else if (ArgSub == "none") {
+ UseDefaults = false;
+ Enable = false;
+ } else if (ArgSub == "default") {
+ UseDefaults = true;
+ } else {
+ // Any other string is invalid or an individual setting.
+ return false;
+ }
+ // All enable values will be initialized to target defaults if 'default' was
+ // specified.
+ if (!UseDefaults)
+ for (auto &KV : RecipMap)
+ KV.second.Enabled = Enable;
+ // Custom refinement count was specified with all, none, or default.
+ if (!RefStepString.empty())
+ for (auto &KV : RecipMap)
+ KV.second.RefinementSteps = RefSteps;
+ return true;
+void TargetRecip::parseIndividualParams(const std::vector<std::string> &Args) {
+ static const char DisabledPrefix = '!';
+ unsigned NumArgs = Args.size();
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ StringRef Val = Args[i];
+ bool IsDisabled = Val[0] == DisabledPrefix;
+ // Ignore the disablement token for string matching.
+ if (IsDisabled)
+ Val = Val.substr(1);
+ size_t RefPos;
+ uint8_t RefSteps;
+ StringRef RefStepString;
+ if (parseRefinementStep(Val, RefPos, RefSteps)) {
+ // Split the string for further processing.
+ RefStepString = Val.substr(RefPos + 1);
+ Val = Val.substr(0, RefPos);
+ }
+ RecipIter Iter = RecipMap.find(Val);
+ if (Iter == RecipMap.end()) {
+ // Try again specifying float suffix.
+ Iter = RecipMap.find(Val.str() + 'f');
+ if (Iter == RecipMap.end()) {
+ Iter = RecipMap.find(Val.str() + 'd');
+ assert(Iter == RecipMap.end() && "Float entry missing from map");
+ report_fatal_error("Invalid option for -recip.");
+ }
+ // The option was specified without a float or double suffix.
+ if (RecipMap[Val.str() + 'd'].Enabled != Uninitialized) {
+ // Make sure that the double entry was not already specified.
+ // The float entry will be checked below.
+ report_fatal_error("Duplicate option for -recip.");
+ }
+ }
+ if (Iter->second.Enabled != Uninitialized)
+ report_fatal_error("Duplicate option for -recip.");
+ // Mark the matched option as found. Do not allow duplicate specifiers.
+ Iter->second.Enabled = !IsDisabled;
+ if (!RefStepString.empty())
+ Iter->second.RefinementSteps = RefSteps;
+ // If the precision was not specified, the double entry is also initialized.
+ if (Val.back() != 'f' && Val.back() != 'd') {
+ RecipMap[Val.str() + 'd'].Enabled = !IsDisabled;
+ if (!RefStepString.empty())
+ RecipMap[Val.str() + 'd'].RefinementSteps = RefSteps;
+ }
+ }
+TargetRecip::TargetRecip(const std::vector<std::string> &Args) :
+ TargetRecip() {
+ unsigned NumArgs = Args.size();
+ // Check if "all", "default", or "none" was specified.
+ if (NumArgs == 1 && parseGlobalParams(Args[0]))
+ return;
+ parseIndividualParams(Args);
+bool TargetRecip::isEnabled(const StringRef &Key) const {
+ ConstRecipIter Iter = RecipMap.find(Key);
+ assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
+ assert(Iter->second.Enabled != Uninitialized &&
+ "Enablement setting was not initialized");
+ return Iter->second.Enabled;
+unsigned TargetRecip::getRefinementSteps(const StringRef &Key) const {
+ ConstRecipIter Iter = RecipMap.find(Key);
+ assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
+ assert(Iter->second.RefinementSteps != Uninitialized &&
+ "Refinement step setting was not initialized");
+ return Iter->second.RefinementSteps;
+/// Custom settings (previously initialized values) override target defaults.
+void TargetRecip::setDefaults(const StringRef &Key, bool Enable,
+ unsigned RefSteps) {
+ if (Key == "all") {
+ for (auto &KV : RecipMap) {
+ RecipParams &RP = KV.second;
+ if (RP.Enabled == Uninitialized)
+ RP.Enabled = Enable;
+ if (RP.RefinementSteps == Uninitialized)
+ RP.RefinementSteps = RefSteps;
+ }
+ } else {
+ RecipParams &RP = RecipMap[Key];
+ if (RP.Enabled == Uninitialized)
+ RP.Enabled = Enable;
+ if (RP.RefinementSteps == Uninitialized)
+ RP.RefinementSteps = RefSteps;
+ }
+bool TargetRecip::operator==(const TargetRecip &Other) const {
+ for (const auto &KV : RecipMap) {
+ const StringRef &Op = KV.first;
+ const RecipParams &RP = KV.second;
+ const RecipParams &OtherRP = Other.RecipMap.find(Op)->second;
+ if (RP.RefinementSteps != OtherRP.RefinementSteps)
+ return false;
+ if (RP.Enabled != OtherRP.Enabled)
+ return false;
+ }
+ return true;
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index a21f8c7..9eee4a0 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -315,7 +315,7 @@ void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg,
// Test (%SrcReg)
- const MCExpr *Disp = MCConstantExpr::Create(0, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(0, Ctx);
std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
getPointerWidth(), 0, Disp, SrcReg, 0, AccessSize, SMLoc(), SMLoc()));
InstrumentMemOperand(*Op, AccessSize, false /* IsWrite */, RegCtx, Ctx,
@@ -324,7 +324,7 @@ void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg,
// Test -1(%SrcReg, %CntReg, AccessSize)
- const MCExpr *Disp = MCConstantExpr::Create(-1, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(-1, Ctx);
std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
getPointerWidth(), 0, Disp, SrcReg, CntReg, AccessSize, SMLoc(),
@@ -334,7 +334,7 @@ void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg,
// Test (%DstReg)
- const MCExpr *Disp = MCConstantExpr::Create(0, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(0, Ctx);
std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
getPointerWidth(), 0, Disp, DstReg, 0, AccessSize, SMLoc(), SMLoc()));
InstrumentMemOperand(*Op, AccessSize, true /* IsWrite */, RegCtx, Ctx, Out);
@@ -342,7 +342,7 @@ void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg,
// Test -1(%DstReg, %CntReg, AccessSize)
- const MCExpr *Disp = MCConstantExpr::Create(-1, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(-1, Ctx);
std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
getPointerWidth(), 0, Disp, DstReg, CntReg, AccessSize, SMLoc(),
@@ -461,7 +461,7 @@ void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op,
while (Residue != 0) {
const MCConstantExpr *Disp =
- MCConstantExpr::Create(ApplyDisplacementBounds(Residue), Ctx);
+ MCConstantExpr::create(ApplyDisplacementBounds(Residue), Ctx);
std::unique_ptr<X86Operand> DispOp =
X86Operand::CreateMem(getPointerWidth(), 0, Disp, Reg, 0, 1, SMLoc(),
@@ -493,7 +493,7 @@ X86AddressSanitizer::AddDisplacement(X86Operand &Op, int64_t Displacement,
*Residue = Displacement - NewDisplacement;
- const MCExpr *Disp = MCConstantExpr::Create(NewDisplacement, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(NewDisplacement, Ctx);
return X86Operand::CreateMem(Op.getMemModeSize(), Op.getMemSegReg(), Disp,
Op.getMemBaseReg(), Op.getMemIndexReg(),
Op.getMemScale(), SMLoc(), SMLoc());
@@ -615,7 +615,7 @@ private:
const std::string &Fn = FuncName(AccessSize, IsWrite);
MCSymbol *FnSym = Ctx.getOrCreateSymbol(StringRef(Fn));
const MCSymbolRefExpr *FnExpr =
- MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
+ MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FnExpr));
@@ -643,7 +643,7 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall(
MCInst Inst;
- const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx);
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI32, 0, 1,
SMLoc(), SMLoc()));
@@ -654,7 +654,7 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall(
Out, MCInstBuilder(X86::TEST8rr).addReg(ShadowRegI8).addReg(ShadowRegI8));
MCSymbol *DoneSym = Ctx.createTempSymbol();
- const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ScratchRegI32).addReg(
@@ -669,7 +669,7 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall(
case 1:
case 2: {
- const MCExpr *Disp = MCConstantExpr::Create(1, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(1, Ctx);
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1,
SMLoc(), SMLoc()));
@@ -720,7 +720,7 @@ void X86AddressSanitizer32::InstrumentMemOperandLarge(
- const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx);
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI32, 0, 1,
SMLoc(), SMLoc()));
@@ -729,7 +729,7 @@ void X86AddressSanitizer32::InstrumentMemOperandLarge(
EmitInstruction(Out, Inst);
MCSymbol *DoneSym = Ctx.createTempSymbol();
- const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
@@ -743,7 +743,7 @@ void X86AddressSanitizer32::InstrumentMOVSImpl(unsigned AccessSize,
// No need to test when ECX is equals to zero.
MCSymbol *DoneSym = Ctx.createTempSymbol();
- const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
Out, MCInstBuilder(X86::TEST32rr).addReg(X86::ECX).addReg(X86::ECX));
EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
@@ -860,7 +860,7 @@ public:
void EmitAdjustRSP(MCContext &Ctx, MCStreamer &Out, long Offset) {
- const MCExpr *Disp = MCConstantExpr::Create(Offset, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(Offset, Ctx);
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, X86::RSP, 0, 1,
SMLoc(), SMLoc()));
@@ -885,7 +885,7 @@ private:
const std::string &Fn = FuncName(AccessSize, IsWrite);
MCSymbol *FnSym = Ctx.getOrCreateSymbol(StringRef(Fn));
const MCSymbolRefExpr *FnExpr =
- MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
+ MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FnExpr));
@@ -914,7 +914,7 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall(
MCInst Inst;
- const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx);
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI64, 0, 1,
SMLoc(), SMLoc()));
@@ -925,7 +925,7 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall(
Out, MCInstBuilder(X86::TEST8rr).addReg(ShadowRegI8).addReg(ShadowRegI8));
MCSymbol *DoneSym = Ctx.createTempSymbol();
- const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ScratchRegI32).addReg(
@@ -940,7 +940,7 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall(
case 1:
case 2: {
- const MCExpr *Disp = MCConstantExpr::Create(1, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(1, Ctx);
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1,
SMLoc(), SMLoc()));
@@ -991,7 +991,7 @@ void X86AddressSanitizer64::InstrumentMemOperandLarge(
- const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
+ const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx);
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI64, 0, 1,
SMLoc(), SMLoc()));
@@ -1001,7 +1001,7 @@ void X86AddressSanitizer64::InstrumentMemOperandLarge(
MCSymbol *DoneSym = Ctx.createTempSymbol();
- const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
@@ -1015,7 +1015,7 @@ void X86AddressSanitizer64::InstrumentMOVSImpl(unsigned AccessSize,
// No need to test when RCX is equals to zero.
MCSymbol *DoneSym = Ctx.createTempSymbol();
- const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
Out, MCInstBuilder(X86::TEST64rr).addReg(X86::RCX).addReg(X86::RCX));
EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 3047fd1..e896571 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -973,7 +973,7 @@ void X86AsmParser::SetFrameRegister(unsigned RegNo) {
std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
unsigned basereg =
is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
- const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
+ const MCExpr *Disp = MCConstantExpr::create(0, getContext());
return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
/*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
Loc, Loc, 0);
@@ -982,7 +982,7 @@ std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
unsigned basereg =
is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
- const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
+ const MCExpr *Disp = MCConstantExpr::create(0, getContext());
return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
/*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
Loc, Loc, 0);
@@ -1195,7 +1195,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
const MCExpr *Val =
- MCSymbolRefExpr::Create(Sym, Variant, getContext());
+ MCSymbolRefExpr::create(Sym, Variant, getContext());
if (IDVal == "b" && Sym->isUndefined())
return Error(Loc, "invalid reference to undefined symbol");
StringRef Identifier = Sym->getName();
@@ -1265,9 +1265,9 @@ X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
if (SM.getImm() || !Disp) {
- const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
+ const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext());
if (Disp)
- Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
+ Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext());
Disp = Imm; // An immediate displacement only.
@@ -1354,7 +1354,7 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
// Create the symbol reference.
MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
- Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
+ Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
return false;
@@ -1382,7 +1382,7 @@ X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
// An immediate following a 'segment register', 'colon' token sequence can
// be followed by a bracketed expression. If it isn't we know we have our
// final segment override.
- const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
+ const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext());
return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
/*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
Start, ImmDispToken.getEndLoc(), Size);
@@ -1435,7 +1435,7 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
return ErrorOperand(Tok.getLoc(), "Expected } at this point");
Parser.Lex(); // Eat "}"
const MCExpr *RndModeOp =
- MCConstantExpr::Create(rndMode, Parser.getContext());
+ MCConstantExpr::create(rndMode, Parser.getContext());
return X86Operand::CreateImm(RndModeOp, Start, End);
@@ -1499,7 +1499,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
return nullptr;
- const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
+ const MCExpr *Disp = MCConstantExpr::create(SM.getImm(), getContext());
// BaseReg is non-zero to avoid assertions. In the context of inline asm,
// we're pointing to a local variable in memory, so the base register is
// really the frame or stack pointer.
@@ -1549,7 +1549,7 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
- NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
+ NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
return false;
@@ -1623,7 +1623,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
unsigned Len = End.getPointer() - TypeLoc.getPointer();
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
- const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
+ const MCExpr *Imm = MCConstantExpr::create(CVal, getContext());
return X86Operand::CreateImm(Imm, Start, End);
@@ -1683,7 +1683,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
- const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
+ const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
return X86Operand::CreateImm(ImmExpr, Start, End);
@@ -1841,7 +1841,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
// of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
// only way to do this without lookahead is to eat the '(' and see what is
// after it.
- const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
+ const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
if (getLexer().isNot(AsmToken::LParen)) {
SMLoc ExprEnd;
if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
@@ -2061,7 +2061,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
- const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
+ const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
@@ -2088,7 +2088,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
- const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
+ const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
@@ -2115,7 +2115,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (ComparisonCode != ~0U) {
Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
- const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
+ const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
@@ -2375,7 +2375,7 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
X86Operand &Op = static_cast<X86Operand &>(*Ops[1]);
assert(Op.isImm() && "expected immediate");
int64_t Res;
- if (!Op.getImm()->EvaluateAsAbsolute(Res) || Res > 255) {
+ if (!Op.getImm()->evaluateAsAbsolute(Res) || Res > 255) {
Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]");
return false;
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 3469d19..6e99c37 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -546,6 +546,8 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
case TYPE_XMM512:
mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4)));
+ case TYPE_BNDR:
+ mcInst.addOperand(MCOperand::createReg(X86::BND0 + (immediate >> 4)));
case TYPE_REL8:
isBranch = true;
pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
@@ -827,6 +829,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_VK16:
+ case TYPE_BNDR:
return translateRMRegister(mcInst, insn);
case TYPE_M:
case TYPE_M8:
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 9e65050..301db72 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -460,6 +460,7 @@ enum OperandEncoding {
ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \
+ ENUM_ENTRY(TYPE_BNDR, "MPX bounds register") \
ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \
ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index af4399a..ea727e6 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -150,11 +150,11 @@ void X86ATTInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
// that address in hex.
const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
int64_t Address;
- if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+ if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) {
O << formatHex((uint64_t)Address);
} else {
// Otherwise, just print the expression.
- O << *Op.getExpr();
+ Op.getExpr()->print(O, &MAI);
@@ -178,7 +178,9 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
- O << markup("<imm:") << '$' << *Op.getExpr() << markup(">");
+ O << markup("<imm:") << '$';
+ Op.getExpr()->print(O, &MAI);
+ O << markup(">");
@@ -203,7 +205,7 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
O << formatImm(DispVal);
} else {
assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
- O << *DispSpec.getExpr();
+ DispSpec.getExpr()->print(O, &MAI);
if (IndexReg.getReg() || BaseReg.getReg()) {
@@ -273,7 +275,7 @@ void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
O << formatImm(DispSpec.getImm());
} else {
assert(DispSpec.isExpr() && "non-immediate displacement?");
- O << *DispSpec.getExpr();
+ DispSpec.getExpr()->print(O, &MAI);
O << markup(">");
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 4d92daf..879378f 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -131,12 +131,12 @@ void X86IntelInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
// that address in hex.
const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
int64_t Address;
- if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+ if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) {
O << formatHex((uint64_t)Address);
else {
// Otherwise, just print the expression.
- O << *Op.getExpr();
+ Op.getExpr()->print(O, &MAI);
@@ -150,7 +150,7 @@ void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
O << formatImm((int64_t)Op.getImm());
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
- O << *Op.getExpr();
+ Op.getExpr()->print(O, &MAI);
@@ -187,7 +187,7 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
if (!DispSpec.isImm()) {
if (NeedPlus) O << " + ";
assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
- O << *DispSpec.getExpr();
+ DispSpec.getExpr()->print(O, &MAI);
} else {
int64_t DispVal = DispSpec.getImm();
if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
@@ -245,7 +245,7 @@ void X86IntelInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
O << formatImm(DispSpec.getImm());
} else {
assert(DispSpec.isExpr() && "non-immediate displacement?");
- O << *DispSpec.getExpr();
+ DispSpec.getExpr()->print(O, &MAI);
O << ']';
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 6d4284d..1ac656d 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -326,7 +326,7 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
// FIXME: We could generated something better than plain 0x90.
if (!HasNopl) {
for (uint64_t i = 0; i < Count; ++i)
- OW->Write8(0x90);
+ OW->write8(0x90);
return true;
@@ -336,10 +336,10 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
for (uint8_t i = 0; i < Prefixes; i++)
- OW->Write8(0x66);
+ OW->write8(0x66);
const uint8_t Rest = ThisNopLength - Prefixes;
for (uint8_t i = 0; i < Rest; i++)
- OW->Write8(Nops[Rest - 1][i]);
+ OW->write8(Nops[Rest - 1][i]);
Count -= ThisNopLength;
} while (Count != 0);
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index 4508883..a33468d 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -66,6 +66,7 @@ static X86_64RelType getType64(unsigned Kind,
case X86::reloc_riprel_4byte:
case X86::reloc_riprel_4byte_movq_load:
return RT64_32;
+ case FK_PCRel_2:
case FK_Data_2:
return RT64_16;
case FK_PCRel_1:
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
index a39def9..2943dd3 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
@@ -31,13 +31,13 @@ public:
StringRef SymName; SymI->getName(SymName);
uint64_t SymAddr; SymI->getAddress(SymAddr);
- uint64_t SymSize; SymI->getSize(SymSize);
+ uint64_t SymSize = SymI->getSize();
int64_t Addend; getELFRelocationAddend(Rel, Addend);
MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName);
// FIXME: check that the value is actually the same.
if (!Sym->isVariable())
- Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx));
+ Sym->setVariableValue(MCConstantExpr::create(SymAddr, Ctx));
const MCExpr *Expr = nullptr;
// If hasAddend is true, then we need to add Addend (r_addend) to Expr.
@@ -76,7 +76,7 @@ public:
case R_X86_64_PC64:
// S + A - P (P/pcrel is implicit)
hasAddend = true;
- Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, Ctx);
case R_X86_64_GOT32:
case R_X86_64_GOT64:
@@ -85,27 +85,27 @@ public:
case R_X86_64_GOTPLT64:
// G + A
hasAddend = true;
- Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOT, Ctx);
case R_X86_64_PLT32:
// L + A - P -> S@PLT + A
hasAddend = true;
- Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_PLT, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_PLT, Ctx);
case R_X86_64_GLOB_DAT:
case R_X86_64_JUMP_SLOT:
// S
- Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, Ctx);
case R_X86_64_GOTPCREL:
case R_X86_64_GOTPCREL64:
// G + GOT + A - P -> S@GOTPCREL + A
hasAddend = true;
- Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
case R_X86_64_GOTOFF64:
// S + A - GOT
- Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTOFF, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTOFF, Ctx);
case R_X86_64_PLTOFF64:
// L + A - GOT
@@ -113,15 +113,15 @@ public:
case R_X86_64_SIZE32:
case R_X86_64_SIZE64:
// Z + A
- Expr = MCConstantExpr::Create(SymSize, Ctx);
+ Expr = MCConstantExpr::create(SymSize, Ctx);
- Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, Ctx);
if (Expr && hasAddend && Addend != 0)
- Expr = MCBinaryExpr::CreateAdd(Expr,
- MCConstantExpr::Create(Addend, Ctx),
+ Expr = MCBinaryExpr::createAdd(Expr,
+ MCConstantExpr::create(Addend, Ctx),
return Expr;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index bda35f2..fc0b0f8 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -119,9 +119,9 @@ X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
MCStreamer &Streamer) const {
MCContext &Context = Streamer.getContext();
const MCExpr *Res =
- MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
- const MCExpr *Four = MCConstantExpr::Create(4, Context);
- return MCBinaryExpr::CreateAdd(Res, Four, Context);
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
+ const MCExpr *Four = MCConstantExpr::create(4, Context);
+ return MCBinaryExpr::createAdd(Res, Four, Context);
void X86MCAsmInfoMicrosoft::anchor() { }
@@ -132,6 +132,11 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
PrivateLabelPrefix = ".L";
PointerSize = 8;
WinEHEncodingType = WinEH::EncodingType::Itanium;
+ } else {
+ // 32-bit X86 doesn't use CFI, so this isn't a real encoding type. It's just
+ // a place holder that the Windows EHStreamer looks for to suppress CFI
+ // output. In particular, usesWindowsCFI() returns false.
+ WinEHEncodingType = WinEH::EncodingType::X86;
ExceptionsType = ExceptionHandling::WinEH;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 8aed7a4..10c434c 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -304,7 +304,7 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
- Expr = MCConstantExpr::Create(DispOp.getImm(), Ctx);
+ Expr = MCConstantExpr::create(DispOp.getImm(), Ctx);
} else {
Expr = DispOp.getExpr();
@@ -351,7 +351,7 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
ImmOffset -= 1;
if (ImmOffset)
- Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx),
+ Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(ImmOffset, Ctx),
// Emit a symbolic constant as a fixup and 4 zeros.
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 8e3c721..cc98e55 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -115,8 +115,8 @@ static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) {
return X;
-static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
- Triple TheTriple(TT);
+static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI,
+ const Triple &TheTriple) {
bool is64Bit = TheTriple.getArch() == Triple::x86_64;
MCAsmInfo *MAI;
diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
index 6cf5af7..a5aadd6 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
@@ -39,33 +39,33 @@ public:
MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName);
// FIXME: check that the value is actually the same.
if (!Sym->isVariable())
- Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx));
+ Sym->setVariableValue(MCConstantExpr::create(SymAddr, Ctx));
const MCExpr *Expr = nullptr;
switch(RelType) {
case X86_64_RELOC_TLV:
- Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
case X86_64_RELOC_SIGNED_4:
- Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx),
- MCConstantExpr::Create(4, Ctx),
+ Expr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Sym, Ctx),
+ MCConstantExpr::create(4, Ctx),
case X86_64_RELOC_SIGNED_2:
- Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx),
- MCConstantExpr::Create(2, Ctx),
+ Expr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Sym, Ctx),
+ MCConstantExpr::create(2, Ctx),
case X86_64_RELOC_SIGNED_1:
- Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx),
- MCConstantExpr::Create(1, Ctx),
+ Expr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Sym, Ctx),
+ MCConstantExpr::create(1, Ctx),
case X86_64_RELOC_GOT_LOAD:
- Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
case X86_64_RELOC_GOT:
- Expr = MCSymbolRefExpr::Create(Sym, isPCRel ?
+ Expr = MCSymbolRefExpr::create(Sym, isPCRel ?
MCSymbolRefExpr::VK_GOTPCREL :
@@ -84,7 +84,7 @@ public:
report_fatal_error("Expected X86_64_RELOC_UNSIGNED after "
- const MCExpr *LHS = MCSymbolRefExpr::Create(Sym, Ctx);
+ const MCExpr *LHS = MCSymbolRefExpr::create(Sym, Ctx);
symbol_iterator RSymI = Rel.getSymbol();
uint64_t RSymAddr;
@@ -94,15 +94,15 @@ public:
MCSymbol *RSym = Ctx.getOrCreateSymbol(RSymName);
if (!RSym->isVariable())
- RSym->setVariableValue(MCConstantExpr::Create(RSymAddr, Ctx));
+ RSym->setVariableValue(MCConstantExpr::create(RSymAddr, Ctx));
- const MCExpr *RHS = MCSymbolRefExpr::Create(RSym, Ctx);
+ const MCExpr *RHS = MCSymbolRefExpr::create(RSym, Ctx);
- Expr = MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+ Expr = MCBinaryExpr::createSub(LHS, RHS, Ctx);
- Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, Ctx);
return Expr;
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 9da3e1f..95acc07 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -25,7 +25,7 @@ using namespace llvm;
namespace {
class X86MachObjectWriter : public MCMachObjectTargetWriter {
- bool RecordScatteredRelocation(MachObjectWriter *Writer,
+ bool recordScatteredRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -33,7 +33,7 @@ class X86MachObjectWriter : public MCMachObjectTargetWriter {
MCValue Target,
unsigned Log2Size,
uint64_t &FixedValue);
- void RecordTLVPRelocation(MachObjectWriter *Writer,
+ void recordTLVPRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -54,12 +54,10 @@ class X86MachObjectWriter : public MCMachObjectTargetWriter {
MCValue Target, uint64_t &FixedValue);
- X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
- uint32_t CPUSubtype)
- : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
- /*UseAggressiveSymbolFolding=*/Is64Bit) {}
+ X86MachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
- void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
+ void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
const MCAsmLayout &Layout, const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) override {
@@ -142,13 +140,11 @@ void X86MachObjectWriter::RecordX86_64Relocation(
const MCSymbol *A = &Target.getSymA()->getSymbol();
if (A->isTemporary())
A = &Writer->findAliasedSymbol(*A);
- const MCSymbolData &A_SD = Asm.getSymbolData(*A);
const MCSymbol *A_Base = Asm.getAtom(*A);
const MCSymbol *B = &Target.getSymB()->getSymbol();
if (B->isTemporary())
B = &Writer->findAliasedSymbol(*B);
- const MCSymbolData &B_SD = Asm.getSymbolData(*B);
const MCSymbol *B_Base = Asm.getAtom(*B);
// Neither symbol can be modified.
@@ -190,7 +186,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(
(!B_Base ? 0 : Writer->getSymbolAddress(*B_Base, Layout));
if (!A_Base)
- Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
+ Index = A->getFragment()->getParent()->getOrdinal() + 1;
Type = MachO::X86_64_RELOC_UNSIGNED;
MachO::any_relocation_info MRE;
@@ -202,7 +198,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(
if (B_Base)
RelSymbol = B_Base;
- Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
+ Index = B->getFragment()->getParent()->getOrdinal() + 1;
Type = MachO::X86_64_RELOC_SUBTRACTOR;
} else {
const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
@@ -211,7 +207,6 @@ void X86MachObjectWriter::RecordX86_64Relocation(
if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec))
- const MCSymbolData &SD = Asm.getSymbolData(*Symbol);
RelSymbol = Asm.getAtom(*Symbol);
// Relocations inside debug sections always use local relocations when
@@ -235,7 +230,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(
} else if (Symbol->isInSection() && !Symbol->isVariable()) {
// The index is the section ordinal (1-based).
- Index = SD.getFragment()->getParent()->getOrdinal() + 1;
+ Index = Symbol->getFragment()->getParent()->getOrdinal() + 1;
Value += Writer->getSymbolAddress(*Symbol, Layout);
if (IsPCRel)
@@ -243,7 +238,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(
} else if (Symbol->isVariable()) {
const MCExpr *Value = Symbol->getVariableValue();
int64_t Res;
- bool isAbs = Value->EvaluateAsAbsolute(Res, Layout,
+ bool isAbs = Value->evaluateAsAbsolute(Res, Layout,
if (isAbs) {
FixedValue = Res;
@@ -339,7 +334,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(
Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
-bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
+bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -354,23 +349,21 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- const MCSymbolData *A_SD = &Asm.getSymbolData(*A);
- if (!A_SD->getFragment())
+ if (!A->getFragment())
report_fatal_error("symbol '" + A->getName() +
"' can not be undefined in a subtraction expression",
uint32_t Value = Writer->getSymbolAddress(*A, Layout);
- uint64_t SecAddr =
- Writer->getSectionAddress(A_SD->getFragment()->getParent());
+ uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
FixedValue += SecAddr;
uint32_t Value2 = 0;
if (const MCSymbolRefExpr *B = Target.getSymB()) {
- const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+ const MCSymbol *SB = &B->getSymbol();
- if (!B_SD->getFragment())
+ if (!SB->getFragment())
report_fatal_error("symbol '" + B->getSymbol().getName() +
"' can not be undefined in a subtraction expression",
@@ -380,10 +373,10 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
// Note that there is no longer any semantic difference between these two
// relocation types from the linkers point of view, this is done solely for
// pedantic compatibility with 'as'.
- Type = A_SD->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF :
+ Type = A->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF
Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
- FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+ FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
// Relocations are written out in reverse order, so the PAIR comes first.
@@ -435,7 +428,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
return true;
-void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
+void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -490,7 +483,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
// If this is a 32-bit TLVP reloc it's handled a bit differently.
if (Target.getSymA() &&
Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
- RecordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ recordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
@@ -499,7 +492,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
// scattered relocation entry. Differences always require scattered
// relocations.
if (Target.getSymB()) {
- RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ recordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
Target, Log2Size, FixedValue);
@@ -515,10 +508,10 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
if (IsPCRel)
Offset += 1 << Log2Size;
// Try to record the scattered relocation if needed. Fall back to non
- // scattered if necessary (see comments in RecordScatteredRelocation()
+ // scattered if necessary (see comments in recordScatteredRelocation()
// for details).
if (Offset && A && !Writer->doesSymbolRequireExternRelocation(*A) &&
- RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ recordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
Log2Size, FixedValue))
@@ -538,7 +531,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
// Resolve constant variables.
if (A->isVariable()) {
int64_t Res;
- if (A->getVariableValue()->EvaluateAsAbsolute(
+ if (A->getVariableValue()->evaluateAsAbsolute(
Res, Layout, Writer->getSectionAddressMap())) {
FixedValue = Res;
diff --git a/lib/Target/X86/ b/lib/Target/X86/
index c70e2e9..8522674 100644
--- a/lib/Target/X86/
+++ b/lib/Target/X86/
@@ -168,6 +168,8 @@ def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
"Support PRFCHW instructions">;
def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
"Support RDSEED instruction">;
+def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true",
+ "Support MPX instructions">;
def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
@@ -188,10 +190,6 @@ def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
"LEA instruction with certain arguments is slow">;
def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
"INC and DEC instructions are slower than ADD and SUB">;
-def FeatureUseSqrtEst : SubtargetFeature<"use-sqrt-est", "UseSqrtEst", "true",
- "Use RSQRT* to optimize square root calculations">;
-def FeatureUseRecipEst : SubtargetFeature<"use-recip-est", "UseReciprocalEst",
- "true", "Use RCP* to optimize division calculations">;
def FeatureSoftFloat
: SubtargetFeature<"soft-float", "UseSoftFloat", "true",
"Use software floating point features.">;
@@ -380,7 +378,7 @@ class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
- FeatureSlowIncDec]>;
+ FeatureSlowIncDec, FeatureMPX]>;
def : KnightsLandingProc<"knl">;
// FIXME: define SKX model
@@ -391,7 +389,7 @@ class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel,
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
- FeatureSlowIncDec]>;
+ FeatureSlowIncDec, FeatureMPX]>;
def : SkylakeProc<"skylake">;
def : SkylakeProc<"skx">; // Legacy alias.
@@ -444,7 +442,7 @@ def : ProcessorModel<"btver2", BtVer2Model,
FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
FeatureBMI, FeatureF16C, FeatureMOVBE,
FeatureLZCNT, FeaturePOPCNT, FeatureFastUAMem,
- FeatureSlowSHLD, FeatureUseSqrtEst, FeatureUseRecipEst]>;
+ FeatureSlowSHLD]>;
// TODO: We should probably add 'FeatureFastUAMem' to all of the AMD chips.
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index f97557e..64fc6d0 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -78,7 +78,7 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
switch (MO.getType()) {
default: llvm_unreachable("unknown symbol type!");
case MachineOperand::MO_ConstantPoolIndex:
- O << *P.GetCPISymbol(MO.getIndex());
+ P.GetCPISymbol(MO.getIndex())->print(O, P.MAI);
P.printOffset(MO.getOffset(), O);
case MachineOperand::MO_GlobalAddress: {
@@ -127,9 +127,12 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
// If the name begins with a dollar-sign, enclose it in parens. We do this
// to avoid having it look like an integer immediate to the assembler.
if (GVSym->getName()[0] != '$')
- O << *GVSym;
- else
- O << '(' << *GVSym << ')';
+ GVSym->print(O, P.MAI);
+ else {
+ O << '(';
+ GVSym->print(O, P.MAI);
+ O << ')';
+ }
P.printOffset(MO.getOffset(), O);
@@ -146,12 +149,15 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
// These affect the name of the symbol, not any suffix.
- O << " + [.-" << *P.MF->getPICBaseSymbol() << ']';
+ O << " + [.-";
+ P.MF->getPICBaseSymbol()->print(O, P.MAI);
+ O << ']';
- O << '-' << *P.MF->getPICBaseSymbol();
+ O << '-';
+ P.MF->getPICBaseSymbol()->print(O, P.MAI);
case X86II::MO_TLSGD: O << "@TLSGD"; break;
case X86II::MO_TLSLD: O << "@TLSLD"; break;
@@ -168,7 +174,8 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
case X86II::MO_PLT: O << "@PLT"; break;
case X86II::MO_TLVP: O << "@TLVP"; break;
- O << "@TLVP" << '-' << *P.MF->getPICBaseSymbol();
+ O << "@TLVP" << '-';
+ P.MF->getPICBaseSymbol()->print(O, P.MAI);
case X86II::MO_SECREL: O << "@SECREL32"; break;
@@ -525,7 +532,7 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
// register any SEH handlers, so its object files should be safe.
OutStreamer->EmitSymbolAttribute(S, MCSA_Global);
- S, MCConstantExpr::Create(int64_t(1), MMI->getContext()));
+ S, MCConstantExpr::create(int64_t(1), MMI->getContext()));
@@ -549,7 +556,7 @@ emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel,
// using NLPs; however, sometimes the types are local to the file.
// We need to fill in the value for the NLP in those cases.
- MCSymbolRefExpr::Create(MCSym.getPointer(), OutStreamer.getContext()),
+ MCSymbolRefExpr::create(MCSym.getPointer(), OutStreamer.getContext()),
4 /*size*/);
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 9af0aeb..3dc75d7 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -3530,9 +3530,9 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
SmallVector<MachineOperand, 8> AddrOps;
- MachineInstr *Result =
- XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps,
- Size, Alignment, /*AllowCommute=*/true);
+ MachineInstr *Result = XII.foldMemoryOperandImpl(
+ *FuncInfo.MF, MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
+ /*AllowCommute=*/true);
if (!Result)
return false;
@@ -3541,20 +3541,21 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
// to just look at OpNo + the offset to the index reg. We actually need to
// scan the instruction to find the index reg and see if its the correct reg
// class.
- for (MIOperands MO(Result); MO.isValid(); ++MO) {
- if (!MO->isReg() || MO->isDef() || MO->getReg() != AM.IndexReg)
+ unsigned OperandNo = 0;
+ for (MachineInstr::mop_iterator I = Result->operands_begin(),
+ E = Result->operands_end(); I != E; ++I, ++OperandNo) {
+ MachineOperand &MO = *I;
+ if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
// Found the index reg, now try to rewrite it.
- unsigned OpNo = MO.getOperandNo();
unsigned IndexReg = constrainOperandRegClass(Result->getDesc(),
- MO->getReg(), OpNo);
- if (IndexReg == MO->getReg())
+ MO.getReg(), OperandNo);
+ if (IndexReg == MO.getReg())
- MO->setReg(IndexReg);
+ MO.setReg(IndexReg);
Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
- FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
return true;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 3ba8115..e3ec288 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -67,12 +67,6 @@ static cl::opt<bool> ExperimentalVectorWideningLegalization(
"rather than promotion."),
-static cl::opt<int> ReciprocalEstimateRefinementSteps(
- "x86-recip-refinement-steps", cl::init(1),
- cl::desc("Specify the number of Newton-Raphson iterations applied to the "
- "result of the hardware reciprocal estimate instruction."),
- cl::NotHidden);
// Forward declarations.
static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
SDValue V2);
@@ -842,13 +836,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
- // Only provide customized ctpop vector bit twiddling for vector types we
- // know to perform better than using the popcnt instructions on each vector
- // element. If popcnt isn't supported, always provide the custom version.
- if (!Subtarget->hasPOPCNT()) {
- setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
- setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
- }
+ setOperationAction(ISD::CTPOP, MVT::v16i8, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
@@ -1113,6 +1104,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v32i8, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v16i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v4i64, Custom);
if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
setOperationAction(ISD::FMA, MVT::v8f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f64, Legal);
@@ -1147,16 +1143,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// when we have a 256bit-wide blend with immediate.
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
- // Only provide customized ctpop vector bit twiddling for vector types we
- // know to perform better than using the popcnt instructions on each
- // vector element. If popcnt isn't supported, always provide the custom
- // version.
- if (!Subtarget->hasPOPCNT())
- setOperationAction(ISD::CTPOP, MVT::v4i64, Custom);
- // Custom CTPOP always performs better on natively supported v8i32
- setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
// AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
@@ -1273,7 +1259,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
setOperationAction(ISD::SETCC, MVT::i1, Custom);
setOperationAction(ISD::XOR, MVT::i1, Legal);
@@ -1842,7 +1828,7 @@ X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
// In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
// entries.
- return MCSymbolRefExpr::Create(MBB->getSymbol(),
+ return MCSymbolRefExpr::create(MBB->getSymbol(),
MCSymbolRefExpr::VK_GOTOFF, Ctx);
@@ -1866,7 +1852,7 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
// Otherwise, the reference is relative to the PIC base.
- return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
+ return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
std::pair<const TargetRegisterClass *, uint8_t>
@@ -1981,7 +1967,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
else if (VA.getLocInfo() == CCValAssign::BCvt)
- ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy);
+ ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
assert(VA.getLocInfo() != CCValAssign::FPExt &&
"Unexpected FP-extend for return value.");
@@ -2018,13 +2004,13 @@ X86TargetLowering::LowerReturn(SDValue Chain,
if (Subtarget->is64Bit()) {
if (ValVT == MVT::x86mmx) {
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
- ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
+ ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
// If we don't have SSE2 available, convert to v4f32 so the generated
// register is legal.
if (!Subtarget->hasSSE2())
- ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
+ ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
@@ -2451,7 +2437,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
else if (VA.getLocInfo() == CCValAssign::BCvt)
- ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
+ ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
if (VA.isExtInLoc()) {
// Handle MMX values passed in XMM regs.
@@ -2780,6 +2766,19 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (MF.getTarget().Options.DisableTailCalls)
isTailCall = false;
+ if (Subtarget->isPICStyleGOT() &&
+ !MF.getTarget().Options.GuaranteedTailCallOpt) {
+ // If we are using a GOT, disable tail calls to external symbols with
+ // default visibility. Tail calling such a symbol requires using a GOT
+ // relocation, which forces early binding of the symbol. This breaks code
+ // that require lazy function symbol resolution. Using musttail or
+ // GuaranteedTailCallOpt will override this.
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ if (!G || (!G->getGlobal()->hasLocalLinkage() &&
+ G->getGlobal()->hasDefaultVisibility()))
+ isTailCall = false;
+ }
bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
if (IsMustTail) {
// Force this to be a tail call. The verifier rules are enough to ensure
@@ -2898,14 +2897,14 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
else if (RegVT.is128BitVector()) {
// Special case: passing MMX values in XMM registers.
- Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
+ Arg = DAG.getBitcast(MVT::i64, Arg);
Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
} else
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
case CCValAssign::BCvt:
- Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
+ Arg = DAG.getBitcast(RegVT, Arg);
case CCValAssign::Indirect: {
// Store the argument.
@@ -2964,8 +2963,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Note: The actual moving to ECX is done further down.
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
- if (G && !G->getGlobal()->hasHiddenVisibility() &&
- !G->getGlobal()->hasProtectedVisibility())
+ if (G && !G->getGlobal()->hasLocalLinkage() &&
+ G->getGlobal()->hasDefaultVisibility())
Callee = LowerGlobalAddress(Callee, DAG);
else if (isa<ExternalSymbolSDNode>(Callee))
Callee = LowerExternalSymbol(Callee, DAG);
@@ -4073,7 +4072,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
} else
llvm_unreachable("Unexpected vector type");
- return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
+ return DAG.getBitcast(VT, Vec);
static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
@@ -4200,9 +4199,9 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
SDValue Mask = DAG.getConstant(0x0f, dl, MVT::i8);
- Vec256 = DAG.getNode(ISD::BITCAST, dl, CastVT, Vec256);
+ Vec256 = DAG.getBitcast(CastVT, Vec256);
Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask);
- return DAG.getNode(ISD::BITCAST, dl, ResultVT, Vec256);
+ return DAG.getBitcast(ResultVT, Vec256);
return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
@@ -4255,7 +4254,7 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
} else
llvm_unreachable("Unexpected vector type");
- return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
+ return DAG.getBitcast(VT, Vec);
/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
@@ -4611,7 +4610,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
- return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V);
+ return DAG.getBitcast(MVT::v16i8, V);
/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
@@ -4749,7 +4748,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
SDLoc DL(Op);
SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
DAG.getIntPtrConstant(InsertPSMask, DL));
- return DAG.getNode(ISD::BITCAST, DL, VT, Result);
+ return DAG.getBitcast(VT, Result);
/// Return a vector logical shift node.
@@ -4759,12 +4758,11 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
assert(VT.is128BitVector() && "Unknown type for VShift");
MVT ShVT = MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
- SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
+ SrcOp = DAG.getBitcast(ShVT, SrcOp);
MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(SrcOp.getValueType());
assert(NumBits % 8 == 0 && "Only support byte sized shifts");
SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy);
- return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
+ return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
static SDValue
@@ -4949,7 +4947,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
SDValue(ResNode.getNode(), 1));
- return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
+ return DAG.getBitcast(VT, ResNode);
return SDValue();
@@ -5261,8 +5259,8 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
SDValue Imm = ConvertI1VectorToInterger(Op, DAG);
if (Imm.getValueSizeInBits() == VT.getSizeInBits())
- return DAG.getNode(ISD::BITCAST, dl, VT, Imm);
- SDValue ExtVec = DAG.getNode(ISD::BITCAST, dl, MVT::v8i1, Imm);
+ return DAG.getBitcast(VT, Imm);
+ SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
DAG.getIntPtrConstant(0, dl));
@@ -5277,7 +5275,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
SDValue In = Op.getOperand(idx);
if (In.getOpcode() == ISD::UNDEF)
- if (!isa<ConstantSDNode>(In))
+ if (!isa<ConstantSDNode>(In))
else {
Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
@@ -5304,12 +5302,12 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
else if (HasConstElts)
Imm = DAG.getConstant(0, dl, VT);
- else
+ else
Imm = DAG.getUNDEF(VT);
if (Imm.getValueSizeInBits() == VT.getSizeInBits())
- DstVec = DAG.getNode(ISD::BITCAST, dl, VT, Imm);
+ DstVec = DAG.getBitcast(VT, Imm);
else {
- SDValue ExtVec = DAG.getNode(ISD::BITCAST, dl, MVT::v8i1, Imm);
+ SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
DAG.getIntPtrConstant(0, dl));
@@ -5818,9 +5816,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// convert it to a vector with movd (S2V+shuffle to zero extend).
Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
- return DAG.getNode(
- getShuffleVectorZeroOrUndef(Item, Idx * 2, true, Subtarget, DAG));
+ return DAG.getBitcast(VT, getShuffleVectorZeroOrUndef(
+ Item, Idx * 2, true, Subtarget, DAG));
@@ -5866,7 +5863,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
- return DAG.getNode(ISD::BITCAST, dl, VT, Item);
+ return DAG.getBitcast(VT, Item);
@@ -6257,6 +6254,42 @@ is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
return true;
+/// \brief Test whether a shuffle mask is equivalent within each 256-bit lane.
+/// This checks a shuffle mask to see if it is performing the same
+/// 256-bit lane-relative shuffle in each 256-bit lane. This trivially implies
+/// that it is also not lane-crossing. It may however involve a blend from the
+/// same lane of a second vector.
+/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
+/// non-trivial to compute in the face of undef lanes. The representation is
+/// *not* suitable for use with existing 256-bit shuffles as it will contain
+/// entries from both V1 and V2 inputs to the wider mask.
+static bool
+is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
+ SmallVectorImpl<int> &RepeatedMask) {
+ int LaneSize = 256 / VT.getScalarSizeInBits();
+ RepeatedMask.resize(LaneSize, -1);
+ int Size = Mask.size();
+ for (int i = 0; i < Size; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ if ((Mask[i] % Size) / LaneSize != i / LaneSize)
+ // This entry crosses lanes, so there is no way to model this shuffle.
+ return false;
+ // Ok, handle the in-lane shuffles by detecting if and when they repeat.
+ if (RepeatedMask[i % LaneSize] == -1)
+ // This is the first non-undef entry in this slot of a 256-bit lane.
+ RepeatedMask[i % LaneSize] =
+ Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + Size;
+ else if (RepeatedMask[i % LaneSize] + (i / LaneSize) * LaneSize != Mask[i])
+ // Found a mismatch with the repeated mask.
+ return false;
+ }
+ return true;
/// \brief Checks whether a shuffle mask is equivalent to an explicit list of
/// arguments.
@@ -6316,6 +6349,22 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
return DAG.getConstant(Imm, DL, MVT::i8);
+/// \brief Get a 8-bit shuffle, 1 bit per lane, immediate for a mask.
+/// This helper function produces an 8-bit shuffle immediate corresponding to
+/// the ubiquitous shuffle encoding scheme used in x86 instructions for
+/// shuffling 8 lanes.
+static SDValue get1bitLaneShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
+ SelectionDAG &DAG) {
+ assert(Mask.size() <= 8 &&
+ "Up to 8 elts may be in Imm8 1-bit lane shuffle mask");
+ unsigned Imm = 0;
+ for (unsigned i = 0; i < Mask.size(); ++i)
+ if (Mask[i] >= 0)
+ Imm |= (Mask[i] % 2) << i;
+ return DAG.getConstant(Imm, DL, MVT::i8);
/// \brief Try to emit a blend instruction for a shuffle using bit math.
/// This is used as a fallback approach when first class blend instructions are
@@ -6341,10 +6390,9 @@ static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1,
V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask);
// We have to cast V2 around.
MVT MaskVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
- V2 = DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(X86ISD::ANDNP, DL, MaskVT,
- DAG.getNode(ISD::BITCAST, DL, MaskVT, V1Mask),
- DAG.getNode(ISD::BITCAST, DL, MaskVT, V2)));
+ V2 = DAG.getBitcast(VT, DAG.getNode(X86ISD::ANDNP, DL, MaskVT,
+ DAG.getBitcast(MaskVT, V1Mask),
+ DAG.getBitcast(MaskVT, V2)));
return DAG.getNode(ISD::OR, DL, VT, V1, V2);
@@ -6395,11 +6443,11 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
BlendMask |= 1u << (i * Scale + j);
MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32;
- V1 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V1);
- V2 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V2);
- return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2,
- DAG.getConstant(BlendMask, DL, MVT::i8)));
+ V1 = DAG.getBitcast(BlendVT, V1);
+ V2 = DAG.getBitcast(BlendVT, V2);
+ return DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2,
+ DAG.getConstant(BlendMask, DL, MVT::i8)));
case MVT::v8i16: {
@@ -6412,11 +6460,11 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
for (int j = 0; j < Scale; ++j)
BlendMask |= 1u << (i * Scale + j);
- V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1);
- V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V2);
- return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2,
- DAG.getConstant(BlendMask, DL, MVT::i8)));
+ V1 = DAG.getBitcast(MVT::v8i16, V1);
+ V2 = DAG.getBitcast(MVT::v8i16, V2);
+ return DAG.getBitcast(VT,
+ DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2,
+ DAG.getConstant(BlendMask, DL, MVT::i8)));
case MVT::v16i16: {
@@ -6465,13 +6513,12 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
: DAG.getConstant(Mask[i] < Size ? -1 : 0, DL,
- V1 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V1);
- V2 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V2);
- return DAG.getNode(
- DAG.getNode(ISD::VSELECT, DL, BlendVT,
- V1, V2));
+ V1 = DAG.getBitcast(BlendVT, V1);
+ V2 = DAG.getBitcast(BlendVT, V2);
+ return DAG.getBitcast(VT, DAG.getNode(ISD::VSELECT, DL, BlendVT,
+ BlendVT, VSELECTMask),
+ V1, V2));
@@ -6652,13 +6699,12 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
if (Subtarget->hasSSSE3()) {
// Cast the inputs to i8 vector of correct length to match PALIGNR.
MVT AlignVT = MVT::getVectorVT(MVT::i8, 16 * NumLanes);
- Lo = DAG.getNode(ISD::BITCAST, DL, AlignVT, Lo);
- Hi = DAG.getNode(ISD::BITCAST, DL, AlignVT, Hi);
+ Lo = DAG.getBitcast(AlignVT, Lo);
+ Hi = DAG.getBitcast(AlignVT, Hi);
- return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Hi, Lo,
- DAG.getConstant(Rotation * Scale, DL,
- MVT::i8)));
+ return DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Hi, Lo,
+ DAG.getConstant(Rotation * Scale, DL, MVT::i8)));
assert(VT.getSizeInBits() == 128 &&
@@ -6671,15 +6717,15 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
int HiByteShift = Rotation * Scale;
// Cast the inputs to v2i64 to match PSLLDQ/PSRLDQ.
- Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Lo);
- Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Hi);
+ Lo = DAG.getBitcast(MVT::v2i64, Lo);
+ Hi = DAG.getBitcast(MVT::v2i64, Hi);
SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, Lo,
DAG.getConstant(LoByteShift, DL, MVT::i8));
SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, Hi,
DAG.getConstant(HiByteShift, DL, MVT::i8));
- return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift));
+ return DAG.getBitcast(VT,
+ DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift));
/// \brief Compute whether each element of a shuffle is zeroable.
@@ -6740,8 +6786,8 @@ static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1,
SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL,
if (EltVT.isFloatingPoint()) {
- Zero = DAG.getNode(ISD::BITCAST, DL, EltVT, Zero);
- AllOnes = DAG.getNode(ISD::BITCAST, DL, EltVT, AllOnes);
+ Zero = DAG.getBitcast(EltVT, Zero);
+ AllOnes = DAG.getBitcast(EltVT, AllOnes);
SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
@@ -6833,11 +6879,11 @@ static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1,
MVT ShiftVT = MVT::getVectorVT(ShiftSVT, Size / Scale);
assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
"Illegal integer vector type");
- V = DAG.getNode(ISD::BITCAST, DL, ShiftVT, V);
+ V = DAG.getBitcast(ShiftVT, V);
V = DAG.getNode(OpCode, DL, ShiftVT, V,
DAG.getConstant(ShiftAmt, DL, MVT::i8));
- return DAG.getNode(ISD::BITCAST, DL, VT, V);
+ return DAG.getBitcast(VT, V);
// SSE/AVX supports logical shifts up to 64-bit integers - so we can just
@@ -6878,31 +6924,28 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
if (Subtarget->hasSSE41()) {
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
NumElements / Scale);
- return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV));
+ return DAG.getBitcast(VT, DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV));
// For any extends we can cheat for larger element sizes and use shuffle
// instructions that can fold with a load and/or copy.
if (AnyExt && EltBits == 32) {
int PSHUFDMask[4] = {0, -1, 1, -1};
- return DAG.getNode(
- DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
- DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, InputV),
- getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
+ return DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
+ DAG.getBitcast(MVT::v4i32, InputV),
+ getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
if (AnyExt && EltBits == 16 && Scale > 2) {
int PSHUFDMask[4] = {0, -1, 0, -1};
InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
- DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, InputV),
+ DAG.getBitcast(MVT::v4i32, InputV),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG));
int PSHUFHWMask[4] = {1, -1, -1, -1};
- return DAG.getNode(
- DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16,
- DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, InputV),
- getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG)));
+ return DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16,
+ DAG.getBitcast(MVT::v8i16, InputV),
+ getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG)));
// If this would require more than 2 unpack instructions to expand, use
@@ -6914,11 +6957,11 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
for (int i = 0; i < 16; ++i)
PSHUFBMask[i] =
DAG.getConstant((i % Scale == 0) ? i / Scale : 0x80, DL, MVT::i8);
- InputV = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, InputV);
- return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV,
- MVT::v16i8, PSHUFBMask)));
+ InputV = DAG.getBitcast(MVT::v16i8, InputV);
+ return DAG.getBitcast(VT,
+ DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV,
+ MVT::v16i8, PSHUFBMask)));
// Otherwise emit a sequence of unpacks.
@@ -6926,13 +6969,13 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT)
: getZeroVector(InputVT, Subtarget, DAG, DL);
- InputV = DAG.getNode(ISD::BITCAST, DL, InputVT, InputV);
+ InputV = DAG.getBitcast(InputVT, InputV);
InputV = DAG.getNode(X86ISD::UNPCKL, DL, InputVT, InputV, Ext);
Scale /= 2;
EltBits *= 2;
NumElements /= 2;
} while (Scale > 1);
- return DAG.getNode(ISD::BITCAST, DL, VT, InputV);
+ return DAG.getBitcast(VT, InputV);
/// \brief Try to lower a vector shuffle as a zero extension on any microarch.
@@ -7030,9 +7073,9 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
if (SDValue V = CanZExtLowHalf()) {
- V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, V);
+ V = DAG.getBitcast(MVT::v2i64, V);
V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v2i64, V);
- return DAG.getNode(ISD::BITCAST, DL, VT, V);
+ return DAG.getBitcast(VT, V);
// No viable ext lowering found.
@@ -7106,7 +7149,7 @@ static SDValue lowerVectorShuffleAsElementInsertion(
if (SDValue V2S = getScalarValueForVectorElement(
V2, Mask[V2Index] - Mask.size(), DAG)) {
// We need to zext the scalar if it is smaller than an i32.
- V2S = DAG.getNode(ISD::BITCAST, DL, EltVT, V2S);
+ V2S = DAG.getBitcast(EltVT, V2S);
if (EltVT == MVT::i8 || EltVT == MVT::i16) {
// Using zext to expand a narrow element won't work for non-zero
// insertions.
@@ -7155,7 +7198,7 @@ static SDValue lowerVectorShuffleAsElementInsertion(
V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2);
if (ExtVT != VT)
- V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
+ V2 = DAG.getBitcast(VT, V2);
if (V2Index != 0) {
// If we have 4 or fewer lanes we can cheaply shuffle the element into
@@ -7167,13 +7210,13 @@ static SDValue lowerVectorShuffleAsElementInsertion(
V2Shuffle[V2Index] = 0;
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle);
} else {
- V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, V2);
+ V2 = DAG.getBitcast(MVT::v2i64, V2);
V2 = DAG.getNode(
X86ISD::VSHLDQ, DL, MVT::v2i64, V2,
V2Index * EltVT.getSizeInBits()/8, DL,
- V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
+ V2 = DAG.getBitcast(VT, V2);
return V2;
@@ -7396,13 +7439,13 @@ static SDValue lowerVectorShuffleAsUnpack(SDLoc DL, MVT VT, SDValue V1,
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
// Cast the inputs to the type we will use to unpack them.
- V1 = DAG.getNode(ISD::BITCAST, DL, UnpackVT, V1);
- V2 = DAG.getNode(ISD::BITCAST, DL, UnpackVT, V2);
+ V1 = DAG.getBitcast(UnpackVT, V1);
+ V2 = DAG.getBitcast(UnpackVT, V2);
// Unpack the inputs and cast the result back to the desired type.
- return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH,
- DL, UnpackVT, V1, V2));
+ return DAG.getBitcast(
+ VT, DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
+ UnpackVT, V1, V2));
// We try each unpack from the largest to the smallest to try and find one
@@ -7558,12 +7601,12 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// Straight shuffle of a single input vector. For everything from SSE2
// onward this has a single fast instruction with no scary immediates.
// We have to map the mask as it is actually a v4i32 shuffle instruction.
- V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V1);
+ V1 = DAG.getBitcast(MVT::v4i32, V1);
int WidenedMask[4] = {
std::max(Mask[0], 0) * 2, std::max(Mask[0], 0) * 2 + 1,
std::max(Mask[1], 0) * 2, std::max(Mask[1], 0) * 2 + 1};
- return DAG.getNode(
- ISD::BITCAST, DL, MVT::v2i64,
+ return DAG.getBitcast(
+ MVT::v2i64,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
getV4X86ShuffleImm8ForMask(WidenedMask, DL, DAG)));
@@ -7584,12 +7627,12 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
if (SDValue V1Pack = GetPackNode(V1))
if (SDValue V2Pack = GetPackNode(V2))
- return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
- DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8,
- Mask[0] == 0 ? V1Pack.getOperand(0)
- : V1Pack.getOperand(1),
- Mask[1] == 2 ? V2Pack.getOperand(0)
- : V2Pack.getOperand(1)));
+ return DAG.getBitcast(MVT::v2i64,
+ DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8,
+ Mask[0] == 0 ? V1Pack.getOperand(0)
+ : V1Pack.getOperand(1),
+ Mask[1] == 2 ? V2Pack.getOperand(0)
+ : V2Pack.getOperand(1)));
// Try to use shift instructions.
if (SDValue Shift =
@@ -7639,10 +7682,10 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// incur 2 cycles of stall for integer vectors on Nehalem and older chips.
// However, all the alternatives are still more cycles and newer chips don't
// have this problem. It would be really nice if x86 had better shuffles here.
- V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V1);
- V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V2);
- return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
- DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask));
+ V1 = DAG.getBitcast(MVT::v2f64, V1);
+ V2 = DAG.getBitcast(MVT::v2f64, V2);
+ return DAG.getBitcast(MVT::v2i64,
+ DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask));
/// \brief Test whether this can be lowered with a single SHUFPS instruction.
@@ -7941,11 +7984,10 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// up the inputs, bypassing domain shift penalties that we would encur if we
// directly used PSHUFD on Nehalem and older. For newer chips, this isn't
// relevant.
- return DAG.getNode(ISD::BITCAST, DL, MVT::v4i32,
- DAG.getVectorShuffle(
- MVT::v4f32, DL,
- DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, V1),
- DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, V2), Mask));
+ return DAG.getBitcast(
+ MVT::v4i32,
+ DAG.getVectorShuffle(MVT::v4f32, DL, DAG.getBitcast(MVT::v4f32, V1),
+ DAG.getBitcast(MVT::v4f32, V2), Mask));
/// \brief Lowering of single-input v8i16 shuffles is the cornerstone of SSE2
@@ -8123,11 +8165,10 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
int PSHUFDMask[] = {0, 1, 2, 3};
PSHUFDMask[ADWord] = BDWord;
PSHUFDMask[BDWord] = ADWord;
- V = DAG.getNode(ISD::BITCAST, DL, VT,
- getV4X86ShuffleImm8ForMask(PSHUFDMask, DL,
- DAG)));
+ V = DAG.getBitcast(
+ VT,
+ getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
// Adjust the mask to match the new locations of A and B.
for (int &M : Mask)
@@ -8368,11 +8409,10 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,
getV4X86ShuffleImm8ForMask(PSHUFHMask, DL, DAG));
if (!isNoopShuffleMask(PSHUFDMask))
- V = DAG.getNode(ISD::BITCAST, DL, VT,
- getV4X86ShuffleImm8ForMask(PSHUFDMask, DL,
- DAG)));
+ V = DAG.getBitcast(
+ VT,
+ getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
// At this point, each half should contain all its inputs, and we can then
// just shuffle them into their final position.
@@ -8433,11 +8473,11 @@ static SDValue lowerVectorShuffleAsPSHUFB(SDLoc DL, MVT VT, SDValue V1,
if (V1InUse)
V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
- DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, V1),
+ DAG.getBitcast(MVT::v16i8, V1),
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V1Mask));
if (V2InUse)
V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
- DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, V2),
+ DAG.getBitcast(MVT::v16i8, V2),
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V2Mask));
// If we need shuffled inputs from both, blend the two.
@@ -8448,7 +8488,7 @@ static SDValue lowerVectorShuffleAsPSHUFB(SDLoc DL, MVT VT, SDValue V1,
V = V1InUse ? V1 : V2;
// Cast the result back to the correct type.
- return DAG.getNode(ISD::BITCAST, DL, VT, V);
+ return DAG.getBitcast(VT, V);
/// \brief Generic lowering of 8-lane i16 shuffles.
@@ -8749,10 +8789,9 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// Update the lane map based on the mapping we ended up with.
LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2;
- V1 = DAG.getNode(
- ISD::BITCAST, DL, MVT::v16i8,
- DAG.getVectorShuffle(MVT::v8i16, DL,
- DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1),
+ V1 = DAG.getBitcast(
+ MVT::v16i8,
+ DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1),
DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle));
// Unpack the bytes to form the i16s that will be shuffled into place.
@@ -8770,10 +8809,9 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
assert(PostDupI16Shuffle[i / 2] == MappedMask &&
"Conflicting entrties in the original shuffle!");
- return DAG.getNode(
- ISD::BITCAST, DL, MVT::v16i8,
- DAG.getVectorShuffle(MVT::v8i16, DL,
- DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1),
+ return DAG.getBitcast(
+ MVT::v16i8,
+ DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1),
DAG.getUNDEF(MVT::v8i16), PostDupI16Shuffle));
if (SDValue V = tryToWidenViaDuplication())
@@ -8866,19 +8904,18 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// We use the mask type to pick which bytes are preserved based on how many
// elements are dropped.
MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 };
- SDValue ByteClearMask =
- DAG.getNode(ISD::BITCAST, DL, MVT::v16i8,
- DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1]));
+ SDValue ByteClearMask = DAG.getBitcast(
+ MVT::v16i8, DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1]));
V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask);
if (!IsSingleInput)
V2 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V2, ByteClearMask);
// Now pack things back together.
- V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1);
- V2 = IsSingleInput ? V1 : DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V2);
+ V1 = DAG.getBitcast(MVT::v8i16, V1);
+ V2 = IsSingleInput ? V1 : DAG.getBitcast(MVT::v8i16, V2);
SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1, V2);
for (int i = 1; i < NumEvenDrops; ++i) {
- Result = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, Result);
+ Result = DAG.getBitcast(MVT::v8i16, Result);
Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Result, Result);
@@ -8912,7 +8949,7 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
std::none_of(std::begin(HiBlendMask), std::end(HiBlendMask),
[](int M) { return M >= 0 && M % 2 == 1; })) {
// Use a mask to drop the high bytes.
- VLoHalf = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
+ VLoHalf = DAG.getBitcast(MVT::v8i16, V);
VLoHalf = DAG.getNode(ISD::AND, DL, MVT::v8i16, VLoHalf,
DAG.getConstant(0x00FF, DL, MVT::v8i16));
@@ -8929,10 +8966,10 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
} else {
// Otherwise just unpack the low half of V into VLoHalf and the high half into
// VHiHalf so that we can blend them as i16s.
- VLoHalf = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
- DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero));
- VHiHalf = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
- DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero));
+ VLoHalf = DAG.getBitcast(
+ MVT::v8i16, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero));
+ VHiHalf = DAG.getBitcast(
+ MVT::v8i16, DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero));
SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask);
@@ -9073,8 +9110,8 @@ static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1,
LoV = DAG.getNode(ISD::BUILD_VECTOR, DL, OrigSplitVT, LoOps);
HiV = DAG.getNode(ISD::BUILD_VECTOR, DL, OrigSplitVT, HiOps);
- return std::make_pair(DAG.getNode(ISD::BITCAST, DL, SplitVT, LoV),
- DAG.getNode(ISD::BITCAST, DL, SplitVT, HiV));
+ return std::make_pair(DAG.getBitcast(SplitVT, LoV),
+ DAG.getBitcast(SplitVT, HiV));
SDValue LoV1, HiV1, LoV2, HiV2;
@@ -9407,12 +9444,12 @@ static SDValue lowerVectorShuffleByMerging128BitLanes(
LaneMask[2 * i + 1] = 2*Lanes[i] + 1;
- V1 = DAG.getNode(ISD::BITCAST, DL, LaneVT, V1);
- V2 = DAG.getNode(ISD::BITCAST, DL, LaneVT, V2);
+ V1 = DAG.getBitcast(LaneVT, V1);
+ V2 = DAG.getBitcast(LaneVT, V2);
SDValue LaneShuffle = DAG.getVectorShuffle(LaneVT, DL, V1, V2, LaneMask);
// Cast it back to the type we actually want.
- LaneShuffle = DAG.getNode(ISD::BITCAST, DL, VT, LaneShuffle);
+ LaneShuffle = DAG.getBitcast(VT, LaneShuffle);
// Now do a simple shuffle that isn't lane crossing.
SmallVector<int, 8> NewMask;
@@ -9441,6 +9478,37 @@ static bool isShuffleMaskInputInPlace(int Input, ArrayRef<int> Mask) {
return true;
+static SDValue lowerVectorShuffleWithSHUFPD(SDLoc DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, SelectionDAG &DAG) {
+ // Mask for V8F64: 0/1, 8/9, 2/3, 10/11, 4/5, ..
+ // Mask for V4F64; 0/1, 4/5, 2/3, 6/7..
+ assert(VT.getScalarSizeInBits() == 64 && "Unexpected data type for VSHUFPD");
+ int NumElts = VT.getVectorNumElements();
+ bool ShufpdMask = true;
+ bool CommutableMask = true;
+ unsigned Immediate = 0;
+ for (int i = 0; i < NumElts; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ int Val = (i & 6) + NumElts * (i & 1);
+ int CommutVal = (i & 0xe) + NumElts * ((i & 1)^1);
+ if (Mask[i] < Val || Mask[i] > Val + 1)
+ ShufpdMask = false;
+ if (Mask[i] < CommutVal || Mask[i] > CommutVal + 1)
+ CommutableMask = false;
+ Immediate |= (Mask[i] % 2) << i;
+ }
+ if (ShufpdMask)
+ return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
+ DAG.getConstant(Immediate, DL, MVT::i8));
+ if (CommutableMask)
+ return DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1,
+ DAG.getConstant(Immediate, DL, MVT::i8));
+ return SDValue();
/// \brief Handle lowering of 4-lane 64-bit floating point shuffles.
/// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
@@ -9505,24 +9573,9 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Blend;
// Check if the blend happens to exactly fit that of SHUFPD.
- if ((Mask[0] == -1 || Mask[0] < 2) &&
- (Mask[1] == -1 || (Mask[1] >= 4 && Mask[1] < 6)) &&
- (Mask[2] == -1 || (Mask[2] >= 2 && Mask[2] < 4)) &&
- (Mask[3] == -1 || Mask[3] >= 6)) {
- unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 5) << 1) |
- ((Mask[2] == 3) << 2) | ((Mask[3] == 7) << 3);
- return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V1, V2,
- DAG.getConstant(SHUFPDMask, DL, MVT::i8));
- }
- if ((Mask[0] == -1 || (Mask[0] >= 4 && Mask[0] < 6)) &&
- (Mask[1] == -1 || Mask[1] < 2) &&
- (Mask[2] == -1 || Mask[2] >= 6) &&
- (Mask[3] == -1 || (Mask[3] >= 2 && Mask[3] < 4))) {
- unsigned SHUFPDMask = (Mask[0] == 5) | ((Mask[1] == 1) << 1) |
- ((Mask[2] == 7) << 2) | ((Mask[3] == 3) << 3);
- return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V2, V1,
- DAG.getConstant(SHUFPDMask, DL, MVT::i8));
- }
+ if (SDValue Op =
+ lowerVectorShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG))
+ return Op;
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle. However, if we have AVX2 and either inputs are already in place,
@@ -9584,10 +9637,10 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
PSHUFDMask[2 * i] = 2 * RepeatedMask[i];
PSHUFDMask[2 * i + 1] = 2 * RepeatedMask[i] + 1;
- return DAG.getNode(
- ISD::BITCAST, DL, MVT::v4i64,
+ return DAG.getBitcast(
+ MVT::v4i64,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32,
- DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, V1),
+ DAG.getBitcast(MVT::v8i32, V1),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
@@ -9700,11 +9753,11 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask));
if (Subtarget->hasAVX2())
- return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32,
- DAG.getNode(ISD::BITCAST, DL, MVT::v8f32,
+ return DAG.getNode(
+ X86ISD::VPERMV, DL, MVT::v8f32,
+ DAG.getBitcast(MVT::v8f32, DAG.getNode(ISD::BUILD_VECTOR, DL,
MVT::v8i32, VPermMask)),
- V1);
+ V1);
// Otherwise, fall back.
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
@@ -9894,12 +9947,11 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
PSHUFBMask[2 * i] = DAG.getConstant(2 * M, DL, MVT::i8);
PSHUFBMask[2 * i + 1] = DAG.getConstant(2 * M + 1, DL, MVT::i8);
- return DAG.getNode(
- ISD::BITCAST, DL, MVT::v16i16,
- DAG.getNode(
- X86ISD::PSHUFB, DL, MVT::v32i8,
- DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, V1),
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask)));
+ return DAG.getBitcast(MVT::v16i16,
+ DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8,
+ DAG.getBitcast(MVT::v32i8, V1),
+ MVT::v32i8, PSHUFBMask)));
// Try to simplify this by merging 128-bit lanes to enable a lane-based
@@ -10039,10 +10091,9 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits),
- V1 = DAG.getNode(ISD::BITCAST, DL, FpVT, V1);
- V2 = DAG.getNode(ISD::BITCAST, DL, FpVT, V2);
- return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask));
+ V1 = DAG.getBitcast(FpVT, V1);
+ V2 = DAG.getBitcast(FpVT, V2);
+ return DAG.getBitcast(VT, DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask));
switch (VT.SimpleTy) {
@@ -10064,64 +10115,60 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
-static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
- SDLoc DL(Op);
- assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
- assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
- ArrayRef<int> Mask = SVOp->getMask();
- assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
- // X86 has dedicated unpack instructions that can handle specific blend
- // operations: UNPCKH and UNPCKL.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2);
+static SDValue lowerVectorShuffleWithVALIGN(SDLoc DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, SelectionDAG &DAG) {
- // FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG);
+ assert(VT.getScalarSizeInBits() >= 32 && "Unexpected data type for VALIGN");
+ // VALIGN pattern 2, 3, 4, 5, .. (sequential, shifted right)
+ int AlignVal = -1;
+ for (int i = 0; i < (signed)VT.getVectorNumElements(); ++i) {
+ if (Mask[i] < 0)
+ continue;
+ if (Mask[i] < i)
+ return SDValue();
+ if (AlignVal == -1)
+ AlignVal = Mask[i] - i;
+ else if (Mask[i] - i != AlignVal)
+ return SDValue();
+ }
+ // Vector source operands should be swapped
+ return DAG.getNode(X86ISD::VALIGN, DL, VT, V2, V1,
+ DAG.getConstant(AlignVal, DL, MVT::i8));
-/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
-static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
- SDLoc DL(Op);
- assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
- assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
- ArrayRef<int> Mask = SVOp->getMask();
- assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
+static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, SelectionDAG &DAG) {
- // Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask,
- {// First 128-bit lane.
- 0, 16, 1, 17, 4, 20, 5, 21,
- // Second 128-bit lane.
- 8, 24, 9, 25, 12, 28, 13, 29}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask,
- {// First 128-bit lane.
- 2, 18, 3, 19, 6, 22, 7, 23,
- // Second 128-bit lane.
- 10, 26, 11, 27, 14, 30, 15, 31}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2);
+ assert(VT.getScalarSizeInBits() >= 16 && "Unexpected data type for PERMV");
- // FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG);
+ MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
+ MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
+ SmallVector<SDValue, 32> VPermMask;
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i)
+ VPermMask.push_back(Mask[i] < 0 ? DAG.getUNDEF(MaskEltVT) :
+ DAG.getConstant(Mask[i], DL,MaskEltVT));
+ SDValue MaskNode = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecVT,
+ VPermMask);
+ if (isSingleInputShuffleMask(Mask))
+ return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1);
+ return DAG.getNode(X86ISD::VPERMV3, DL, VT, MaskNode, V1, V2);
-/// \brief Handle lowering of 8-lane 64-bit integer shuffles.
-static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
+static SDValue lowerV8X64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
- assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
- assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
+ MVT VT = Op.getSimpleValueType();
+ assert((V1.getSimpleValueType() == MVT::v8f64 ||
+ V1.getSimpleValueType() == MVT::v8i64) && "Bad operand type!");
+ assert((V2.getSimpleValueType() == MVT::v8f64 ||
+ V2.getSimpleValueType() == MVT::v8i64) && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
@@ -10129,21 +10176,40 @@ static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2);
+ return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2);
+ return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
- // FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG);
+ if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG))
+ return Op;
+ if (SDValue Op = lowerVectorShuffleWithSHUFPD(DL, VT, Mask, V1, V2, DAG))
+ return Op;
+ // PERMILPD instruction - mask 0/1, 0/1, 2/3, 2/3, 4/5, 4/5, 6/7, 6/7
+ if (isSingleInputShuffleMask(Mask)) {
+ if (!is128BitLaneCrossingShuffleMask(VT, Mask))
+ return DAG.getNode(X86ISD::VPERMILPI, DL, VT, V1,
+ get1bitLaneShuffleImm8ForMask(Mask, DL, DAG));
+ SmallVector<int, 4> RepeatedMask;
+ if (is256BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask))
+ return DAG.getNode(X86ISD::VPERMI, DL, VT, V1,
+ getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
+ }
+ return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG);
/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
-static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+static SDValue lowerV16X32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
- assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
- assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
+ assert((V1.getSimpleValueType() == MVT::v16i32 ||
+ V1.getSimpleValueType() == MVT::v16f32) && "Bad operand type!");
+ assert((V2.getSimpleValueType() == MVT::v16i32 ||
+ V2.getSimpleValueType() == MVT::v16f32) && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
@@ -10154,16 +10220,39 @@ static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
0, 16, 1, 17, 4, 20, 5, 21,
// Second 128-bit lane.
8, 24, 9, 25, 12, 28, 13, 29}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2);
+ return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
if (isShuffleEquivalent(V1, V2, Mask,
{// First 128-bit lane.
2, 18, 3, 19, 6, 22, 7, 23,
// Second 128-bit lane.
10, 26, 11, 27, 14, 30, 15, 31}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2);
+ return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
- // FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG);
+ if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10,
+ 12, 12, 14, 14}))
+ return DAG.getNode(X86ISD::MOVSLDUP, DL, VT, V1);
+ if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11,
+ 13, 13, 15, 15}))
+ return DAG.getNode(X86ISD::MOVSHDUP, DL, VT, V1);
+ SmallVector<int, 4> RepeatedMask;
+ if (is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) {
+ if (isSingleInputShuffleMask(Mask)) {
+ unsigned Opc = VT.isInteger() ? X86ISD::PSHUFD : X86ISD::VPERMILPI;
+ return DAG.getNode(Opc, DL, VT, V1,
+ getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
+ }
+ for (int i = 0; i < 4; ++i)
+ if (RepeatedMask[i] >= 16)
+ RepeatedMask[i] -= 12;
+ return lowerVectorShuffleWithSHUFPS(DL, VT, RepeatedMask, V1, V2, DAG);
+ }
+ if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG))
+ return Op;
+ return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG);
/// \brief Handle lowering of 32-lane 16-bit integer shuffles.
@@ -10223,13 +10312,11 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// the requisite ISA extensions for that element type are available.
switch (VT.SimpleTy) {
case MVT::v8f64:
- return lowerV8F64VectorShuffle(Op, V1, V2, Subtarget, DAG);
- case MVT::v16f32:
- return lowerV16F32VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v8i64:
- return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ return lowerV8X64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v16f32:
case MVT::v16i32:
- return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ return lowerV16X32VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v32i16:
if (Subtarget->hasBWI())
return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG);
@@ -10311,10 +10398,10 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
// Make sure that the new vector type is legal. For example, v2f64 isn't
// legal on SSE1.
if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
- V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
- V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
- return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getVectorShuffle(NewVT, dl, V1, V2, WidenedMask));
+ V1 = DAG.getBitcast(NewVT, V1);
+ V2 = DAG.getBitcast(NewVT, V2);
+ return DAG.getBitcast(
+ VT, DAG.getVectorShuffle(NewVT, dl, V1, V2, WidenedMask));
@@ -10509,12 +10596,11 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
// If Idx is 0, it's cheaper to do a move instead of a pextrw.
if (Idx == 0)
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
- DAG.getNode(ISD::BITCAST, dl,
- MVT::v4i32,
- Op.getOperand(0)),
- Op.getOperand(1)));
+ return DAG.getNode(
+ ISD::TRUNCATE, dl, MVT::i16,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getBitcast(MVT::v4i32, Op.getOperand(0)),
+ Op.getOperand(1)));
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
@@ -10538,10 +10624,9 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
User->getValueType(0) != MVT::i32))
return SDValue();
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
- DAG.getNode(ISD::BITCAST, dl, MVT::v4i32,
- Op.getOperand(0)),
- Op.getOperand(1));
- return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract);
+ DAG.getBitcast(MVT::v4i32, Op.getOperand(0)),
+ Op.getOperand(1));
+ return DAG.getBitcast(MVT::f32, Extract);
if (VT == MVT::i32 || VT == MVT::i64) {
@@ -10655,8 +10740,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
- DAG.getNode(ISD::BITCAST, dl,
- MVT::v4i32, Vec),
+ DAG.getBitcast(MVT::v4i32, Vec),
// Transform it so it match pextrw which produces a 32-bit result.
MVT EltVT = MVT::i32;
@@ -10877,8 +10961,8 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
assert(OpVT.is128BitVector() && "Expected an SSE type!");
- return DAG.getNode(ISD::BITCAST, dl, OpVT,
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
+ return DAG.getBitcast(
+ OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, AnyExt));
// Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in
@@ -11670,14 +11754,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
false, false, false, 16);
- SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32,
- DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, XR1),
- CLod0);
+ SDValue Unpck1 =
+ getUnpackl(DAG, dl, MVT::v4i32, DAG.getBitcast(MVT::v4i32, XR1), CLod0);
SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
false, false, false, 16);
- SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck1);
+ SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1);
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
SDValue Result;
@@ -11685,12 +11768,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
// FIXME: The 'haddpd' instruction may be slower than 'movhlps + addsd'.
Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
} else {
- SDValue S2F = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Sub);
+ SDValue S2F = DAG.getBitcast(MVT::v4i32, Sub);
SDValue Shuffle = getTargetShuffleNode(X86ISD::PSHUFD, dl, MVT::v4i32,
S2F, 0x4E, DAG);
Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64,
- DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Shuffle),
- Sub);
+ DAG.getBitcast(MVT::v2f64, Shuffle), Sub);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
@@ -11713,20 +11795,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG);
Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
- DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
+ DAG.getBitcast(MVT::v2f64, Load),
DAG.getIntPtrConstant(0, dl));
// Or the load with the bias.
- SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64,
- DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
- MVT::v2f64, Load)),
- DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
- MVT::v2f64, Bias)));
- Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
- DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or),
- DAG.getIntPtrConstant(0, dl));
+ SDValue Or = DAG.getNode(
+ ISD::OR, dl, MVT::v2i64,
+ DAG.getBitcast(MVT::v2i64,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Load)),
+ DAG.getBitcast(MVT::v2i64,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bias)));
+ Or =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl));
// Subtract the bias.
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
@@ -11805,19 +11886,16 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
if (Subtarget.hasSSE41()) {
EVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16;
// uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa);
- SDValue VecCstLowBitcast =
- DAG.getNode(ISD::BITCAST, DL, VecI16VT, VecCstLow);
- SDValue VecBitcast = DAG.getNode(ISD::BITCAST, DL, VecI16VT, V);
+ SDValue VecCstLowBitcast = DAG.getBitcast(VecI16VT, VecCstLow);
+ SDValue VecBitcast = DAG.getBitcast(VecI16VT, V);
// Low will be bitcasted right away, so do not bother bitcasting back to its
// original type.
Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast,
VecCstLowBitcast, DAG.getConstant(0xaa, DL, MVT::i32));
// uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
// (uint4) 0x53000000, 0xaa);
- SDValue VecCstHighBitcast =
- DAG.getNode(ISD::BITCAST, DL, VecI16VT, VecCstHigh);
- SDValue VecShiftBitcast =
- DAG.getNode(ISD::BITCAST, DL, VecI16VT, HighShift);
+ SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh);
+ SDValue VecShiftBitcast = DAG.getBitcast(VecI16VT, HighShift);
// High will be bitcasted right away, so do not bother bitcasting back to
// its original type.
High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast,
@@ -11843,11 +11921,11 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
makeArrayRef(&CstFAddArray[0], NumElts));
// float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
- SDValue HighBitcast = DAG.getNode(ISD::BITCAST, DL, VecFloatVT, High);
+ SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High);
SDValue FHigh =
DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd);
// return (float4) lo + fhi;
- SDValue LowBitcast = DAG.getNode(ISD::BITCAST, DL, VecFloatVT, Low);
+ SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low);
return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh);
@@ -12103,8 +12181,8 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
MVT HVT = MVT::getVectorVT(VT.getVectorElementType(),
- OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
- OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi);
+ OpLo = DAG.getBitcast(HVT, OpLo);
+ OpHi = DAG.getBitcast(HVT, OpHi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
@@ -12189,14 +12267,14 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
if (InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 &&
return Op; // legal, will go to VPMOVB2M, VPMOVW2M
- if ((InVT.is256BitVector() || InVT.is128BitVector())
+ if ((InVT.is256BitVector() || InVT.is128BitVector())
&& InVT.getScalarSizeInBits() <= 16 &&
Subtarget->hasBWI() && Subtarget->hasVLX())
return Op; // legal, will go to VPMOVB2M, VPMOVW2M
if (InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 &&
return Op; // legal, will go to VPMOVD2M, VPMOVQ2M
- if ((InVT.is256BitVector() || InVT.is128BitVector())
+ if ((InVT.is256BitVector() || InVT.is128BitVector())
&& InVT.getScalarSizeInBits() >= 32 &&
Subtarget->hasDQI() && Subtarget->hasVLX())
return Op; // legal, will go to VPMOVB2M, VPMOVQ2M
@@ -12224,7 +12302,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
if (Subtarget->hasInt256()) {
static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
- In = DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, In);
+ In = DAG.getBitcast(MVT::v8i32, In);
In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32),
@@ -12235,8 +12313,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
DAG.getIntPtrConstant(0, DL));
SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
DAG.getIntPtrConstant(2, DL));
- OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo);
- OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi);
+ OpLo = DAG.getBitcast(MVT::v4i32, OpLo);
+ OpHi = DAG.getBitcast(MVT::v4i32, OpHi);
static const int ShufMask[] = {0, 2, 4, 6};
return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask);
@@ -12244,7 +12322,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
// On AVX2, v8i32 -> v8i16 becomed PSHUFB.
if (Subtarget->hasInt256()) {
- In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In);
+ In = DAG.getBitcast(MVT::v32i8, In);
SmallVector<SDValue,32> pshufbMask;
for (unsigned i = 0; i < 2; ++i) {
@@ -12261,14 +12339,14 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, pshufbMask);
In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV);
- In = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, In);
+ In = DAG.getBitcast(MVT::v4i64, In);
static const int ShufMask[] = {0, 2, -1, -1};
In = DAG.getVectorShuffle(MVT::v4i64, DL, In, DAG.getUNDEF(MVT::v4i64),
In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
DAG.getIntPtrConstant(0, DL));
- return DAG.getNode(ISD::BITCAST, DL, VT, In);
+ return DAG.getBitcast(VT, In);
SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
@@ -12277,8 +12355,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
DAG.getIntPtrConstant(4, DL));
- OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpLo);
- OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpHi);
+ OpLo = DAG.getBitcast(MVT::v16i8, OpLo);
+ OpHi = DAG.getBitcast(MVT::v16i8, OpHi);
// The PSHUFB mask:
static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
@@ -12288,13 +12366,13 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, Undef, ShufMask1);
OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, Undef, ShufMask1);
- OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo);
- OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi);
+ OpLo = DAG.getBitcast(MVT::v4i32, OpLo);
+ OpHi = DAG.getBitcast(MVT::v4i32, OpHi);
// The MOVLHPS Mask:
static const int ShufMask2[] = {0, 1, 4, 5};
SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2);
- return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, res);
+ return DAG.getBitcast(MVT::v8i16, res);
// Handle truncation of V256 to V128 using shuffles.
@@ -12310,8 +12388,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
// Prepare truncation shuffle mask
for (unsigned i = 0; i != NumElems; ++i)
MaskVec[i] = i * 2;
- SDValue V = DAG.getVectorShuffle(NVT, DL,
- DAG.getNode(ISD::BITCAST, DL, NVT, In),
+ SDValue V = DAG.getVectorShuffle(NVT, DL, DAG.getBitcast(NVT, In),
DAG.getUNDEF(NVT), &MaskVec[0]);
DAG.getIntPtrConstant(0, DL));
@@ -12420,13 +12497,12 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
// For a vector, cast operands to a vector type, perform the logic op,
// and cast the result back to the original value type.
MVT VecVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
- SDValue MaskCasted = DAG.getNode(ISD::BITCAST, dl, VecVT, Mask);
- SDValue Operand = IsFNABS ?
- DAG.getNode(ISD::BITCAST, dl, VecVT, Op0.getOperand(0)) :
- DAG.getNode(ISD::BITCAST, dl, VecVT, Op0);
+ SDValue MaskCasted = DAG.getBitcast(VecVT, Mask);
+ SDValue Operand = IsFNABS ? DAG.getBitcast(VecVT, Op0.getOperand(0))
+ : DAG.getBitcast(VecVT, Op0);
unsigned BitOp = IsFABS ? ISD::AND : IsFNABS ? ISD::OR : ISD::XOR;
- return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getNode(BitOp, dl, VecVT, Operand, MaskCasted));
+ return DAG.getBitcast(VT,
+ DAG.getNode(BitOp, dl, VecVT, Operand, MaskCasted));
// If not vector, then scalar.
@@ -12591,7 +12667,7 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget,
// Cast all vectors into TestVT for PTEST.
for (unsigned i = 0, e = VecIns.size(); i < e; ++i)
- VecIns[i] = DAG.getNode(ISD::BITCAST, DL, TestVT, VecIns[i]);
+ VecIns[i] = DAG.getBitcast(TestVT, VecIns[i]);
// If more than one full vectors are evaluated, OR them first before PTEST.
for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) {
@@ -12925,29 +13001,31 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
DAGCombinerInfo &DCI,
unsigned &RefinementSteps,
bool &UseOneConstNR) const {
- // FIXME: We should use instruction latency models to calculate the cost of
- // each potential sequence, but this is very hard to do reliably because
- // at least Intel's Core* chips have variable timing based on the number of
- // significant digits in the divisor and/or sqrt operand.
- if (!Subtarget->useSqrtEst())
- return SDValue();
EVT VT = Op.getValueType();
+ const char *RecipOp;
- // SSE1 has rsqrtss and rsqrtps.
+ // SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps.
// TODO: Add support for AVX512 (v16f32).
// It is likely not profitable to do this for f64 because a double-precision
// rsqrt estimate with refinement on x86 prior to FMA requires at least 16
// instructions: convert to single, rsqrtss, convert back to double, refine
// (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
// along with FMA, this could be a throughput win.
- if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
- (Subtarget->hasAVX() && VT == MVT::v8f32)) {
- RefinementSteps = 1;
- UseOneConstNR = false;
- return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
- }
- return SDValue();
+ if (VT == MVT::f32 && Subtarget->hasSSE1())
+ RecipOp = "sqrtf";
+ else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) ||
+ (VT == MVT::v8f32 && Subtarget->hasAVX()))
+ RecipOp = "vec-sqrtf";
+ else
+ return SDValue();
+ TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+ if (!Recips.isEnabled(RecipOp))
+ return SDValue();
+ RefinementSteps = Recips.getRefinementSteps(RecipOp);
+ UseOneConstNR = false;
+ return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
/// The minimum architected relative accuracy is 2^-12. We need one
@@ -12955,15 +13033,9 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
DAGCombinerInfo &DCI,
unsigned &RefinementSteps) const {
- // FIXME: We should use instruction latency models to calculate the cost of
- // each potential sequence, but this is very hard to do reliably because
- // at least Intel's Core* chips have variable timing based on the number of
- // significant digits in the divisor.
- if (!Subtarget->useReciprocalEst())
- return SDValue();
EVT VT = Op.getValueType();
+ const char *RecipOp;
// SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
// TODO: Add support for AVX512 (v16f32).
// It is likely not profitable to do this for f64 because a double-precision
@@ -12971,12 +13043,20 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
// 15 instructions: convert to single, rcpss, convert back to double, refine
// (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA
// along with FMA, this could be a throughput win.
- if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
- (Subtarget->hasAVX() && VT == MVT::v8f32)) {
- RefinementSteps = ReciprocalEstimateRefinementSteps;
- return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
- }
- return SDValue();
+ if (VT == MVT::f32 && Subtarget->hasSSE1())
+ RecipOp = "divf";
+ else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) ||
+ (VT == MVT::v8f32 && Subtarget->hasAVX()))
+ RecipOp = "vec-divf";
+ else
+ return SDValue();
+ TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+ if (!Recips.isEnabled(RecipOp))
+ return SDValue();
+ RefinementSteps = Recips.getRefinementSteps(RecipOp);
+ return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
/// If we have at least two divisions that use the same divisor, convert to
@@ -13407,8 +13487,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
assert(Subtarget->hasSSE2() && "Don't know how to lower!");
// First cast everything to the right type.
- Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
- Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
+ Op0 = DAG.getBitcast(MVT::v4i32, Op0);
+ Op1 = DAG.getBitcast(MVT::v4i32, Op1);
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations. The lower
@@ -13442,7 +13522,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
if (Invert)
Result = DAG.getNOT(dl, Result, MVT::v4i32);
- return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ return DAG.getBitcast(VT, Result);
if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
@@ -13451,8 +13531,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
// First cast everything to the right type.
- Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
- Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
+ Op0 = DAG.getBitcast(MVT::v4i32, Op0);
+ Op1 = DAG.getBitcast(MVT::v4i32, Op1);
// Do the compare.
SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
@@ -13465,7 +13545,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
if (Invert)
Result = DAG.getNOT(dl, Result, MVT::v4i32);
- return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ return DAG.getBitcast(VT, Result);
@@ -13662,7 +13742,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp);
EVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64;
- VCmp = DAG.getNode(ISD::BITCAST, DL, VCmpVT, VCmp);
+ VCmp = DAG.getBitcast(VCmpVT, VCmp);
SDValue VSel = DAG.getNode(ISD::VSELECT, DL, VecVT, VCmp, VOp1, VOp2);
@@ -13687,12 +13767,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0))
Op2Scalar = Op2.getOperand(0);
if (Op1Scalar.getNode() && Op2Scalar.getNode()) {
- SDValue newSelect = DAG.getNode(ISD::SELECT, DL,
+ SDValue newSelect = DAG.getNode(ISD::SELECT, DL,
Cond, Op1Scalar, Op2Scalar);
if (newSelect.getValueSizeInBits() == VT.getSizeInBits())
- return DAG.getNode(ISD::BITCAST, DL, VT, newSelect);
- SDValue ExtVec = DAG.getNode(ISD::BITCAST, DL, MVT::v8i1, newSelect);
+ return DAG.getBitcast(VT, newSelect);
+ SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec,
DAG.getIntPtrConstant(0, DL));
@@ -13975,7 +14055,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr);
MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2);
CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2);
- Curr = DAG.getNode(ISD::BITCAST, dl, CurrVT, Curr);
+ Curr = DAG.getBitcast(CurrVT, Curr);
SDValue SignExt = Curr;
@@ -13993,7 +14073,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
SDValue Sign = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
DAG.getConstant(31, dl, MVT::i8));
SDValue Ext = DAG.getVectorShuffle(CurrVT, dl, SignExt, Sign, {0, 4, 1, 5});
- return DAG.getNode(ISD::BITCAST, dl, VT, Ext);
+ return DAG.getBitcast(VT, Ext);
return SDValue();
@@ -14202,7 +14282,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
// Bitcast the loaded value to a vector of the original element type, in
// the size of the target vector type.
- SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res);
+ SDValue SlicedVec = DAG.getBitcast(WideVecVT, Res);
unsigned SizeRatio = RegSz / MemSz;
if (Ext == ISD::SEXTLOAD) {
@@ -14227,7 +14307,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
SDValue Shuff = DAG.getVectorShuffle(
WideVecVT, dl, SlicedVec, DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
- Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
+ Shuff = DAG.getBitcast(RegVT, Shuff);
// Build the arithmetic shift.
unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
@@ -14249,7 +14329,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
// Bitcast to the requested type.
- Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
+ Shuff = DAG.getBitcast(RegVT, Shuff);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
return Shuff;
@@ -14933,7 +15013,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
MVT EltVT = VT.getVectorElementType();
EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
- ShAmt = DAG.getNode(ISD::BITCAST, dl, ShVT, ShAmt);
+ ShAmt = DAG.getBitcast(ShVT, ShAmt);
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
@@ -14959,8 +15039,8 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
// are extracted by EXTRACT_SUBVECTOR.
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
- DAG.getIntPtrConstant(0, dl));
+ DAG.getBitcast(BitcastVT, Mask),
+ DAG.getIntPtrConstant(0, dl));
switch (Op.getOpcode()) {
default: break;
@@ -15017,12 +15097,31 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Op.getOperand(2), Op.getOperand(3));
SDValue Src = Op.getOperand(1);
- SDValue Src0 = Op.getOperand(2);
+ SDValue PassThru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
- SDValue RoundingMode = Op.getOperand(4);
+ SDValue RoundingMode;
+ if (Op.getNumOperands() == 4)
+ RoundingMode = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
+ else
+ RoundingMode = Op.getOperand(4);
+ unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
+ if (IntrWithRoundingModeOpcode != 0) {
+ unsigned Round = cast<ConstantSDNode>(RoundingMode)->getZExtValue();
+ return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
+ dl, Op.getValueType(), Src, RoundingMode),
+ Mask, PassThru, Subtarget, DAG);
+ }
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
- Mask, Src0, Subtarget, DAG);
+ Mask, PassThru, Subtarget, DAG);
+ }
+ case INTR_TYPE_1OP_MASK: {
+ SDValue Src = Op.getOperand(1);
+ SDValue Passthru = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
+ Mask, Passthru, Subtarget, DAG);
SDValue Src1 = Op.getOperand(1);
@@ -15069,6 +15168,30 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Mask, PassThru, Subtarget, DAG);
+ case INTR_TYPE_3OP_MASK: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue Src3 = Op.getOperand(3);
+ SDValue PassThru = Op.getOperand(4);
+ SDValue Mask = Op.getOperand(5);
+ // We specify 2 possible opcodes for intrinsics with rounding modes.
+ // First, we check if the intrinsic may have non-default rounding mode,
+ // (IntrData->Opc1 != 0), then we check the rounding mode operand.
+ unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
+ if (IntrWithRoundingModeOpcode != 0) {
+ SDValue Rnd = Op.getOperand(6);
+ unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue();
+ return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
+ dl, Op.getValueType(),
+ Src1, Src2, Src3, Rnd),
+ Mask, PassThru, Subtarget, DAG);
+ }
+ }
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+ Src1, Src2, Src3),
+ Mask, PassThru, Subtarget, DAG);
+ }
case FMA_OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
@@ -15140,7 +15263,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
DAG.getUNDEF(BitcastVT), CmpMask,
DAG.getIntPtrConstant(0, dl));
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ return DAG.getBitcast(Op.getValueType(), Res);
case COMI: { // Comparison intrinsics
ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
@@ -15176,7 +15299,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDLoc dl(Op);
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
+ DAG.getBitcast(BitcastVT, Mask),
DAG.getIntPtrConstant(0, dl));
return DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress,
@@ -15191,7 +15314,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDLoc dl(Op);
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
+ DAG.getBitcast(BitcastVT, Mask),
DAG.getIntPtrConstant(0, dl));
return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1),
@@ -15211,16 +15334,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(1));
- case Intrinsic::x86_avx512_mask_valign_q_512:
- case Intrinsic::x86_avx512_mask_valign_d_512:
- // Vector source operands are swapped.
- return getVectorMaskingNode(DAG.getNode(X86ISD::VALIGN, dl,
- Op.getValueType(), Op.getOperand(2),
- Op.getOperand(1),
- Op.getOperand(3)),
- Op.getOperand(5), Op.getOperand(4),
- Subtarget, DAG);
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
// or testp pattern and a setcc for the result.
@@ -15289,8 +15402,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
case Intrinsic::x86_avx512_kortestz_w:
case Intrinsic::x86_avx512_kortestc_w: {
unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz_w)? X86::COND_E: X86::COND_B;
- SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1));
- SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2));
+ SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
+ SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
SDValue CC = DAG.getConstant(X86CC, dl, MVT::i8);
SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i1, CC, Test);
@@ -15378,7 +15491,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
// Compute the symbol for the LSDA. We know it'll get emitted later.
MachineFunction &MF = DAG.getMachineFunction();
SDValue Op1 = Op.getOperand(1);
- Op1->dump();
auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal());
MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol(
@@ -15409,7 +15521,7 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
- MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
+ MaskInReg = DAG.getBitcast(MaskVT, Mask);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
@@ -15437,7 +15549,7 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
- MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
+ MaskInReg = DAG.getBitcast(MaskVT, Mask);
SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
@@ -15460,7 +15572,7 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
- MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
+ MaskInReg = DAG.getBitcast(MaskVT, Mask);
//SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue Ops[] = {MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops);
@@ -15693,23 +15805,25 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);
+ EVT VT = DataToCompress.getValueType();
if (isAllOnes(Mask)) // return just a store
return DAG.getStore(Chain, dl, DataToCompress, Addr,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false,
+ VT.getScalarSizeInBits()/8);
- EVT VT = DataToCompress.getValueType();
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
+ DAG.getBitcast(BitcastVT, Mask),
DAG.getIntPtrConstant(0, dl));
SDValue Compressed = DAG.getNode(IntrData->Opc0, dl, VT, VMask,
DataToCompress, DAG.getUNDEF(VT));
return DAG.getStore(Chain, dl, Compressed, Addr,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false,
+ VT.getScalarSizeInBits()/8);
SDLoc dl(Op);
@@ -15721,17 +15835,18 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
if (isAllOnes(Mask)) // return just a load
return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false,
- false, 0);
+ false, VT.getScalarSizeInBits()/8);
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
+ DAG.getBitcast(BitcastVT, Mask),
DAG.getIntPtrConstant(0, dl));
SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(),
- false, false, false, 0);
+ false, false, false,
+ VT.getScalarSizeInBits()/8);
SDValue Results[] = {
DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToExpand, PathThru),
@@ -16274,8 +16389,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
-1, 4, -1, 5, -1, 6, -1, 7};
ALo = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
BLo = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
- ALo = DAG.getNode(ISD::BITCAST, dl, ExVT, ALo);
- BLo = DAG.getNode(ISD::BITCAST, dl, ExVT, BLo);
+ ALo = DAG.getBitcast(ExVT, ALo);
+ BLo = DAG.getBitcast(ExVT, BLo);
ALo = DAG.getNode(ISD::SRA, dl, ExVT, ALo, DAG.getConstant(8, dl, ExVT));
BLo = DAG.getNode(ISD::SRA, dl, ExVT, BLo, DAG.getConstant(8, dl, ExVT));
@@ -16294,8 +16409,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
-1, 12, -1, 13, -1, 14, -1, 15};
AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
- AHi = DAG.getNode(ISD::BITCAST, dl, ExVT, AHi);
- BHi = DAG.getNode(ISD::BITCAST, dl, ExVT, BHi);
+ AHi = DAG.getBitcast(ExVT, AHi);
+ BHi = DAG.getBitcast(ExVT, BHi);
AHi = DAG.getNode(ISD::SRA, dl, ExVT, AHi, DAG.getConstant(8, dl, ExVT));
BHi = DAG.getNode(ISD::SRA, dl, ExVT, BHi, DAG.getConstant(8, dl, ExVT));
@@ -16323,8 +16438,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
// Now multiply odd parts.
SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, Aodds, Bodds);
- Evens = DAG.getNode(ISD::BITCAST, dl, VT, Evens);
- Odds = DAG.getNode(ISD::BITCAST, dl, VT, Odds);
+ Evens = DAG.getBitcast(VT, Evens);
+ Odds = DAG.getBitcast(VT, Odds);
// Merge the two vectors back together with a shuffle. This expands into 2
// shuffles.
@@ -16352,10 +16467,10 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
// Bit cast to 32-bit vectors for MULUDQ
EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
(VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32;
- A = DAG.getNode(ISD::BITCAST, dl, MulVT, A);
- B = DAG.getNode(ISD::BITCAST, dl, MulVT, B);
- Ahi = DAG.getNode(ISD::BITCAST, dl, MulVT, Ahi);
- Bhi = DAG.getNode(ISD::BITCAST, dl, MulVT, Bhi);
+ A = DAG.getBitcast(MulVT, A);
+ B = DAG.getBitcast(MulVT, B);
+ Ahi = DAG.getBitcast(MulVT, Ahi);
+ Bhi = DAG.getBitcast(MulVT, Bhi);
SDValue AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B);
SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
@@ -16417,7 +16532,7 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
- return DAG.getNode(ISD::BITCAST, dl, VT, CallInfo.first);
+ return DAG.getBitcast(VT, CallInfo.first);
static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
@@ -16455,12 +16570,10 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
(!IsSigned || !Subtarget->hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
// PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
// => <2 x i64> <ae|cg>
- SDValue Mul1 = DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getNode(Opcode, dl, MulVT, Op0, Op1));
+ SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Op0, Op1));
// PMULUDQ <4 x i32> <b|undef|d|undef>, <4 x i32> <f|undef|h|undef>
// => <2 x i64> <bf|dh>
- SDValue Mul2 = DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1));
+ SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1));
// Shuffle it back into the right order.
SDValue Highs, Lows;
@@ -16499,16 +16612,16 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
// Return true if the requred (according to Opcode) shift-imm form is natively
// supported by the Subtarget
-static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget,
+static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget,
unsigned Opcode) {
if (VT.getScalarSizeInBits() < 16)
return false;
if (VT.is512BitVector() &&
(VT.getScalarSizeInBits() > 16 || Subtarget->hasBWI()))
return true;
- bool LShift = VT.is128BitVector() ||
+ bool LShift = VT.is128BitVector() ||
(VT.is256BitVector() && Subtarget->hasInt256());
bool AShift = LShift && (Subtarget->hasVLX() ||
@@ -16518,15 +16631,15 @@ static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget,
// The shift amount is a variable, but it is the same for all vector lanes.
// These instrcutions are defined together with shift-immediate.
-bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget,
+bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget,
unsigned Opcode) {
return SupportedVectorShiftWithImm(VT, Subtarget, Opcode);
// Return true if the requred (according to Opcode) variable-shift form is
// natively supported by the Subtarget
-static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget,
+static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget,
unsigned Opcode) {
if (!Subtarget->hasInt256() || VT.getScalarSizeInBits() < 16)
@@ -16574,7 +16687,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
// Make a large shift.
SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT,
R, ShiftAmt, DAG);
- SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
+ SHL = DAG.getBitcast(VT, SHL);
// Zero out the rightmost bits.
SmallVector<SDValue, 32> V(
NumElts, DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, MVT::i8));
@@ -16585,7 +16698,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
// Make a large shift.
SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT,
R, ShiftAmt, DAG);
- SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
+ SRL = DAG.getBitcast(VT, SRL);
// Zero out the leftmost bits.
SmallVector<SDValue, 32> V(
NumElts, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, MVT::i8));
@@ -16801,7 +16914,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
Op = DAG.getNode(ISD::ADD, dl, VT, Op,
DAG.getConstant(0x3f800000U, dl, VT));
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
+ Op = DAG.getBitcast(MVT::v4f32, Op);
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
@@ -16871,11 +16984,11 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2);
if (TargetOpcode == X86ISD::MOVSD)
CastVT = MVT::v2i64;
- SDValue BitCast1 = DAG.getNode(ISD::BITCAST, dl, CastVT, Shift1);
- SDValue BitCast2 = DAG.getNode(ISD::BITCAST, dl, CastVT, Shift2);
+ SDValue BitCast1 = DAG.getBitcast(CastVT, Shift1);
+ SDValue BitCast2 = DAG.getBitcast(CastVT, Shift2);
SDValue Result = getTargetShuffleNode(TargetOpcode, dl, CastVT, BitCast2,
BitCast1, DAG);
- return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ return DAG.getBitcast(VT, Result);
@@ -16931,10 +17044,10 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Amt, Z);
SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, R, R);
SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, R, R);
- ALo = DAG.getNode(ISD::BITCAST, dl, ExtVT, ALo);
- AHi = DAG.getNode(ISD::BITCAST, dl, ExtVT, AHi);
- RLo = DAG.getNode(ISD::BITCAST, dl, ExtVT, RLo);
- RHi = DAG.getNode(ISD::BITCAST, dl, ExtVT, RHi);
+ ALo = DAG.getBitcast(ExtVT, ALo);
+ AHi = DAG.getBitcast(ExtVT, AHi);
+ RLo = DAG.getBitcast(ExtVT, RLo);
+ RHi = DAG.getBitcast(ExtVT, RHi);
SDValue Lo = DAG.getNode(Op.getOpcode(), dl, ExtVT, RLo, ALo);
SDValue Hi = DAG.getNode(Op.getOpcode(), dl, ExtVT, RHi, AHi);
Lo = DAG.getNode(ISD::SRL, dl, ExtVT, Lo, DAG.getConstant(16, dl, ExtVT));
@@ -17293,7 +17406,7 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Elts);
- SDValue ToV2F64 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, BV);
+ SDValue ToV2F64 = DAG.getBitcast(MVT::v2f64, BV);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64,
DAG.getIntPtrConstant(0, dl));
@@ -17315,141 +17428,241 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
return SDValue();
-static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
- SDNode *Node = Op.getNode();
- SDLoc dl(Node);
+/// Compute the horizontal sum of bytes in V for the elements of VT.
+/// Requires V to be a byte vector and VT to be an integer vector type with
+/// wider elements than V's type. The width of the elements of VT determines
+/// how many bytes of V are summed horizontally to produce each element of the
+/// result.
+static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(V);
+ MVT ByteVecVT = V.getSimpleValueType();
+ MVT EltVT = VT.getVectorElementType();
+ int NumElts = VT.getVectorNumElements();
+ assert(ByteVecVT.getVectorElementType() == MVT::i8 &&
+ "Expected value to have byte element type.");
+ assert(EltVT != MVT::i8 &&
+ "Horizontal byte sum only makes sense for wider elements!");
+ unsigned VecSize = VT.getSizeInBits();
+ assert(ByteVecVT.getSizeInBits() == VecSize && "Cannot change vector size!");
+ // PSADBW instruction horizontally add all bytes and leave the result in i64
+ // chunks, thus directly computes the pop count for v2i64 and v4i64.
+ if (EltVT == MVT::i64) {
+ SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
+ V = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, V, Zeros);
+ return DAG.getBitcast(VT, V);
+ }
+ if (EltVT == MVT::i32) {
+ // We unpack the low half and high half into i32s interleaved with zeros so
+ // that we can use PSADBW to horizontally sum them. The most useful part of
+ // this is that it lines up the results of two PSADBW instructions to be
+ // two v2i64 vectors which concatenated are the 4 population counts. We can
+ // then use PACKUSWB to shrink and concatenate them into a v4i32 again.
+ SDValue Zeros = getZeroVector(VT, Subtarget, DAG, DL);
+ SDValue Low = DAG.getNode(X86ISD::UNPCKL, DL, VT, V, Zeros);
+ SDValue High = DAG.getNode(X86ISD::UNPCKH, DL, VT, V, Zeros);
+ // Do the horizontal sums into two v2i64s.
+ Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
+ Low = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT,
+ DAG.getBitcast(ByteVecVT, Low), Zeros);
+ High = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT,
+ DAG.getBitcast(ByteVecVT, High), Zeros);
+ // Merge them together.
+ MVT ShortVecVT = MVT::getVectorVT(MVT::i16, VecSize / 16);
+ V = DAG.getNode(X86ISD::PACKUS, DL, ByteVecVT,
+ DAG.getBitcast(ShortVecVT, Low),
+ DAG.getBitcast(ShortVecVT, High));
+ return DAG.getBitcast(VT, V);
+ }
+ // The only element type left is i16.
+ assert(EltVT == MVT::i16 && "Unknown how to handle type");
+ // To obtain pop count for each i16 element starting from the pop count for
+ // i8 elements, shift the i16s left by 8, sum as i8s, and then shift as i16s
+ // right by 8. It is important to shift as i16s as i8 vector shift isn't
+ // directly supported.
+ SmallVector<SDValue, 16> Shifters(NumElts, DAG.getConstant(8, DL, EltVT));
+ SDValue Shifter = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Shifters);
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, DAG.getBitcast(VT, V), Shifter);
+ V = DAG.getNode(ISD::ADD, DL, ByteVecVT, DAG.getBitcast(ByteVecVT, Shl),
+ DAG.getBitcast(ByteVecVT, V));
+ return DAG.getNode(ISD::SRL, DL, VT, DAG.getBitcast(VT, V), Shifter);
+static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, SDLoc DL,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ MVT EltVT = VT.getVectorElementType();
+ unsigned VecSize = VT.getSizeInBits();
- Op = Op.getOperand(0);
- EVT VT = Op.getValueType();
- assert((VT.is128BitVector() || VT.is256BitVector()) &&
- "CTPOP lowering only implemented for 128/256-bit wide vector types");
+ // Implement a lookup table in register by using an algorithm based on:
+ //
+ //
+ // The general idea is that every lower byte nibble in the input vector is an
+ // index into a in-register pre-computed pop count table. We then split up the
+ // input vector in two new ones: (1) a vector with only the shifted-right
+ // higher nibbles for each byte and (2) a vector with the lower nibbles (and
+ // masked out higher ones) for each byte. PSHUB is used separately with both
+ // to index the in-register table. Next, both are added and the result is a
+ // i8 vector where each element contains the pop count for input byte.
+ //
+ // To obtain the pop count for elements != i8, we follow up with the same
+ // approach and use additional tricks as described below.
+ //
+ const int LUT[16] = {/* 0 */ 0, /* 1 */ 1, /* 2 */ 1, /* 3 */ 2,
+ /* 4 */ 1, /* 5 */ 2, /* 6 */ 2, /* 7 */ 3,
+ /* 8 */ 1, /* 9 */ 2, /* a */ 2, /* b */ 3,
+ /* c */ 2, /* d */ 3, /* e */ 3, /* f */ 4};
+ int NumByteElts = VecSize / 8;
+ MVT ByteVecVT = MVT::getVectorVT(MVT::i8, NumByteElts);
+ SDValue In = DAG.getBitcast(ByteVecVT, Op);
+ SmallVector<SDValue, 16> LUTVec;
+ for (int i = 0; i < NumByteElts; ++i)
+ LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8));
+ SDValue InRegLUT = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVecVT, LUTVec);
+ SmallVector<SDValue, 16> Mask0F(NumByteElts,
+ DAG.getConstant(0x0F, DL, MVT::i8));
+ SDValue M0F = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVecVT, Mask0F);
+ // High nibbles
+ SmallVector<SDValue, 16> Four(NumByteElts, DAG.getConstant(4, DL, MVT::i8));
+ SDValue FourV = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVecVT, Four);
+ SDValue HighNibbles = DAG.getNode(ISD::SRL, DL, ByteVecVT, In, FourV);
+ // Low nibbles
+ SDValue LowNibbles = DAG.getNode(ISD::AND, DL, ByteVecVT, In, M0F);
+ // The input vector is used as the shuffle mask that index elements into the
+ // LUT. After counting low and high nibbles, add the vector to obtain the
+ // final pop count per i8 element.
+ SDValue HighPopCnt =
+ DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, HighNibbles);
+ SDValue LowPopCnt =
+ DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, LowNibbles);
+ SDValue PopCnt = DAG.getNode(ISD::ADD, DL, ByteVecVT, HighPopCnt, LowPopCnt);
- unsigned NumElts = VT.getVectorNumElements();
- EVT EltVT = VT.getVectorElementType();
- unsigned Len = EltVT.getSizeInBits();
+ if (EltVT == MVT::i8)
+ return PopCnt;
+ return LowerHorizontalByteSum(PopCnt, VT, Subtarget, DAG);
+static SDValue LowerVectorCTPOPBitmath(SDValue Op, SDLoc DL,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ assert(VT.is128BitVector() &&
+ "Only 128-bit vector bitmath lowering supported.");
+ int VecSize = VT.getSizeInBits();
+ MVT EltVT = VT.getVectorElementType();
+ int Len = EltVT.getSizeInBits();
// This is the vectorized version of the "best" algorithm from
// with a minor tweak to use a series of adds + shifts instead of vector
- // multiplications. Implemented for the v2i64, v4i64, v4i32, v8i32 types:
- //
- // v2i64, v4i64, v4i32 => Only profitable w/ popcnt disabled
- // v8i32 => Always profitable
- //
- // FIXME: There a couple of possible improvements:
- //
- // 1) Support for i8 and i16 vectors (needs measurements if popcnt enabled).
- // 2) Use strategies from
- //
- assert(EltVT.isInteger() && (Len == 32 || Len == 64) && Len % 8 == 0 &&
- "CTPOP not implemented for this vector element type.");
+ // multiplications. Implemented for all integer vector types. We only use
+ // this when we don't have SSSE3 which allows a LUT-based lowering that is
+ // much faster, even faster than using native popcnt instructions.
+ auto GetShift = [&](unsigned OpCode, SDValue V, int Shifter) {
+ MVT VT = V.getSimpleValueType();
+ SmallVector<SDValue, 32> Shifters(
+ VT.getVectorNumElements(),
+ DAG.getConstant(Shifter, DL, VT.getVectorElementType()));
+ return DAG.getNode(OpCode, DL, VT, V,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Shifters));
+ };
+ auto GetMask = [&](SDValue V, APInt Mask) {
+ MVT VT = V.getSimpleValueType();
+ SmallVector<SDValue, 32> Masks(
+ VT.getVectorNumElements(),
+ DAG.getConstant(Mask, DL, VT.getVectorElementType()));
+ return DAG.getNode(ISD::AND, DL, VT, V,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Masks));
+ };
- // X86 canonicalize ANDs to vXi64, generate the appropriate bitcasts to avoid
- // extra legalization.
- bool NeedsBitcast = EltVT == MVT::i32;
- MVT BitcastVT = VT.is256BitVector() ? MVT::v4i64 : MVT::v2i64;
+ // We don't want to incur the implicit masks required to SRL vNi8 vectors on
+ // x86, so set the SRL type to have elements at least i16 wide. This is
+ // correct because all of our SRLs are followed immediately by a mask anyways
+ // that handles any bits that sneak into the high bits of the byte elements.
+ MVT SrlVT = Len > 8 ? VT : MVT::getVectorVT(MVT::i16, VecSize / 16);
- SDValue Cst55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl,
- EltVT);
- SDValue Cst33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl,
- EltVT);
- SDValue Cst0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl,
- EltVT);
+ SDValue V = Op;
// v = v - ((v >> 1) & 0x55555555...)
- SmallVector<SDValue, 8> Ones(NumElts, DAG.getConstant(1, dl, EltVT));
- SDValue OnesV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ones);
- SDValue Srl = DAG.getNode(ISD::SRL, dl, VT, Op, OnesV);
- if (NeedsBitcast)
- Srl = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Srl);
- SmallVector<SDValue, 8> Mask55(NumElts, Cst55);
- SDValue M55 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask55);
- if (NeedsBitcast)
- M55 = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M55);
- SDValue And = DAG.getNode(ISD::AND, dl, Srl.getValueType(), Srl, M55);
- if (VT != And.getValueType())
- And = DAG.getNode(ISD::BITCAST, dl, VT, And);
- SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Op, And);
+ SDValue Srl =
+ DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 1));
+ SDValue And = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x55)));
+ V = DAG.getNode(ISD::SUB, DL, VT, V, And);
// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
- SmallVector<SDValue, 8> Mask33(NumElts, Cst33);
- SDValue M33 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask33);
- SmallVector<SDValue, 8> Twos(NumElts, DAG.getConstant(2, dl, EltVT));
- SDValue TwosV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Twos);
+ SDValue AndLHS = GetMask(V, APInt::getSplat(Len, APInt(8, 0x33)));
+ Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 2));
+ SDValue AndRHS = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x33)));
+ V = DAG.getNode(ISD::ADD, DL, VT, AndLHS, AndRHS);
- Srl = DAG.getNode(ISD::SRL, dl, VT, Sub, TwosV);
- if (NeedsBitcast) {
- Srl = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Srl);
- M33 = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M33);
- Sub = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Sub);
- }
+ // v = (v + (v >> 4)) & 0x0F0F0F0F...
+ Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 4));
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, V, Srl);
+ V = GetMask(Add, APInt::getSplat(Len, APInt(8, 0x0F)));
- SDValue AndRHS = DAG.getNode(ISD::AND, dl, M33.getValueType(), Srl, M33);
- SDValue AndLHS = DAG.getNode(ISD::AND, dl, M33.getValueType(), Sub, M33);
- if (VT != AndRHS.getValueType()) {
- AndRHS = DAG.getNode(ISD::BITCAST, dl, VT, AndRHS);
- AndLHS = DAG.getNode(ISD::BITCAST, dl, VT, AndLHS);
- }
- SDValue Add = DAG.getNode(ISD::ADD, dl, VT, AndLHS, AndRHS);
+ // At this point, V contains the byte-wise population count, and we are
+ // merely doing a horizontal sum if necessary to get the wider element
+ // counts.
+ if (EltVT == MVT::i8)
+ return V;
- // v = (v + (v >> 4)) & 0x0F0F0F0F...
- SmallVector<SDValue, 8> Fours(NumElts, DAG.getConstant(4, dl, EltVT));
- SDValue FoursV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Fours);
- Srl = DAG.getNode(ISD::SRL, dl, VT, Add, FoursV);
- Add = DAG.getNode(ISD::ADD, dl, VT, Add, Srl);
- SmallVector<SDValue, 8> Mask0F(NumElts, Cst0F);
- SDValue M0F = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask0F);
- if (NeedsBitcast) {
- Add = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Add);
- M0F = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M0F);
- }
- And = DAG.getNode(ISD::AND, dl, M0F.getValueType(), Add, M0F);
- if (VT != And.getValueType())
- And = DAG.getNode(ISD::BITCAST, dl, VT, And);
- // The algorithm mentioned above uses:
- // v = (v * 0x01010101...) >> (Len - 8)
- //
- // Change it to use vector adds + vector shifts which yield faster results on
- // Haswell than using vector integer multiplication.
- //
- // For i32 elements:
- // v = v + (v >> 8)
- // v = v + (v >> 16)
- //
- // For i64 elements:
- // v = v + (v >> 8)
- // v = v + (v >> 16)
- // v = v + (v >> 32)
- //
- Add = And;
- SmallVector<SDValue, 8> Csts;
- for (unsigned i = 8; i <= Len/2; i *= 2) {
- Csts.assign(NumElts, DAG.getConstant(i, dl, EltVT));
- SDValue CstsV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Csts);
- Srl = DAG.getNode(ISD::SRL, dl, VT, Add, CstsV);
- Add = DAG.getNode(ISD::ADD, dl, VT, Add, Srl);
- Csts.clear();
+ return LowerHorizontalByteSum(
+ DAG.getBitcast(MVT::getVectorVT(MVT::i8, VecSize / 8), V), VT, Subtarget,
+ DAG);
+static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ // FIXME: Need to add AVX-512 support here!
+ assert((VT.is256BitVector() || VT.is128BitVector()) &&
+ "Unknown CTPOP type to handle");
+ SDLoc DL(Op.getNode());
+ SDValue Op0 = Op.getOperand(0);
+ if (!Subtarget->hasSSSE3()) {
+ // We can't use the fast LUT approach, so fall back on vectorized bitmath.
+ assert(VT.is128BitVector() && "Only 128-bit vectors supported in SSE!");
+ return LowerVectorCTPOPBitmath(Op0, DL, Subtarget, DAG);
- // The result is on the least significant 6-bits on i32 and 7-bits on i64.
- SDValue Cst3F = DAG.getConstant(APInt(Len, Len == 32 ? 0x3F : 0x7F), dl,
- EltVT);
- SmallVector<SDValue, 8> Cst3FV(NumElts, Cst3F);
- SDValue M3F = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Cst3FV);
- if (NeedsBitcast) {
- Add = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Add);
- M3F = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M3F);
+ if (VT.is256BitVector() && !Subtarget->hasInt256()) {
+ unsigned NumElems = VT.getVectorNumElements();
+ // Extract each 128-bit vector, compute pop count and concat the result.
+ SDValue LHS = Extract128BitVector(Op0, 0, DAG, DL);
+ SDValue RHS = Extract128BitVector(Op0, NumElems/2, DAG, DL);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
+ LowerVectorCTPOPInRegLUT(LHS, DL, Subtarget, DAG),
+ LowerVectorCTPOPInRegLUT(RHS, DL, Subtarget, DAG));
- And = DAG.getNode(ISD::AND, dl, M3F.getValueType(), Add, M3F);
- if (VT != And.getValueType())
- And = DAG.getNode(ISD::BITCAST, dl, VT, And);
- return And;
+ return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG);
+static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ assert(Op.getValueType().isVector() &&
+ "We only do custom lowering for vector population count.");
+ return LowerVectorCTPOP(Op, Subtarget, DAG);
static SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
@@ -17840,8 +18053,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue VBias = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2f64, Bias, Bias);
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
- DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, VBias));
- Or = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or);
+ DAG.getBitcast(MVT::v2i64, VBias));
+ Or = DAG.getBitcast(MVT::v2f64, Or);
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
@@ -17964,7 +18177,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
SDValue Expanded = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
MVT::v2f64, N->getOperand(0));
- SDValue ToVecInt = DAG.getNode(ISD::BITCAST, dl, WiderVT, Expanded);
+ SDValue ToVecInt = DAG.getBitcast(WiderVT, Expanded);
if (ExperimentalVectorWideningLegalization) {
// If we are legalizing vectors by widening, we already have the desired
@@ -17994,7 +18207,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FANDN: return "X86ISD::FANDN";
case X86ISD::FOR: return "X86ISD::FOR";
case X86ISD::FXOR: return "X86ISD::FXOR";
- case X86ISD::FSRL: return "X86ISD::FSRL";
case X86ISD::FILD: return "X86ISD::FILD";
case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
@@ -18121,6 +18333,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
case X86ISD::SHUFP: return "X86ISD::SHUFP";
+ case X86ISD::SHUF128: return "X86ISD::SHUF128";
case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS";
case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD";
case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS";
@@ -18143,8 +18356,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3";
case X86ISD::VPERMI: return "X86ISD::VPERMI";
+ case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM";
+ case X86ISD::VRANGE: return "X86ISD::VRANGE";
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
case X86ISD::PMULDQ: return "X86ISD::PMULDQ";
+ case X86ISD::PSADBW: return "X86ISD::PSADBW";
case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
@@ -18184,6 +18400,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND";
case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND";
case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND";
+ case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND";
+ case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND";
case X86ISD::ADDS: return "X86ISD::ADDS";
case X86ISD::SUBS: return "X86ISD::SUBS";
@@ -18193,7 +18411,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
// isLegalAddressingMode - Return true if the addressing mode represented
// by AM is legal for this target, for a load/store of the specified type.
bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
+ Type *Ty,
+ unsigned AS) const {
// X86 supports extremely general addressing modes.
CodeModel::Model M = getTargetMachine().getCodeModel();
Reloc::Model R = getTargetMachine().getRelocationModel();
@@ -20028,7 +20247,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
SDValue(ResNode.getNode(), 1));
- return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
+ return DAG.getBitcast(VT, ResNode);
@@ -20087,7 +20306,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
// Just remove no-op shuffle masks.
if (Mask.size() == 1) {
- DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Input),
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input),
/*AddTo*/ true);
return true;
@@ -20123,14 +20342,14 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
if (Depth == 1 && Root->getOpcode() == Shuffle)
return false; // Nothing to do!
- Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ Op = DAG.getBitcast(ShuffleVT, Input);
if (Shuffle == X86ISD::MOVDDUP)
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op);
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
- DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op),
/*AddTo*/ true);
return true;
@@ -20141,11 +20360,11 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
MVT ShuffleVT = MVT::v4f32;
if (Depth == 1 && Root->getOpcode() == Shuffle)
return false; // Nothing to do!
- Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ Op = DAG.getBitcast(ShuffleVT, Input);
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op);
- DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op),
/*AddTo*/ true);
return true;
@@ -20155,11 +20374,11 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
MVT ShuffleVT = MVT::v4f32;
if (Depth == 1 && Root->getOpcode() == Shuffle)
return false; // Nothing to do!
- Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ Op = DAG.getBitcast(ShuffleVT, Input);
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
- DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op),
/*AddTo*/ true);
return true;
@@ -20189,11 +20408,11 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
llvm_unreachable("Impossible mask size!");
- Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ Op = DAG.getBitcast(ShuffleVT, Input);
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
- DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op),
/*AddTo*/ true);
return true;
@@ -20222,14 +20441,14 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
PSHUFBMask.push_back(DAG.getConstant(M, DL, MVT::i8));
MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes);
- Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Input);
+ Op = DAG.getBitcast(ByteVT, Input);
SDValue PSHUFBMaskOp =
Op = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Op, PSHUFBMaskOp);
- DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op),
/*AddTo*/ true);
return true;
@@ -20401,7 +20620,7 @@ static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
#ifndef NDEBUG
for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i)
for (int j = 0; j < LaneElts; ++j)
- assert(Mask[j] == Mask[i * LaneElts + j] - LaneElts &&
+ assert(Mask[j] == Mask[i * LaneElts + j] - (LaneElts * i) &&
"Mask doesn't repeat in high 128-bit lanes!");
@@ -20532,7 +20751,7 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
SDValue W = Chain.pop_back_val();
if (V.getValueType() != W.getOperand(0).getValueType())
- V = DAG.getNode(ISD::BITCAST, DL, W.getOperand(0).getValueType(), V);
+ V = DAG.getBitcast(W.getOperand(0).getValueType(), V);
switch (W.getOpcode()) {
@@ -20551,7 +20770,7 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
if (V.getValueType() != N.getValueType())
- V = DAG.getNode(ISD::BITCAST, DL, N.getValueType(), V);
+ V = DAG.getBitcast(N.getValueType(), V);
// Return the new chain to replace N.
return V;
@@ -20668,12 +20887,12 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
DMask[DOffset + 0] = DOffset + 1;
DMask[DOffset + 1] = DOffset + 0;
MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
- V = DAG.getNode(ISD::BITCAST, DL, DVT, V);
+ V = DAG.getBitcast(DVT, V);
V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V,
getV4X86ShuffleImm8ForMask(DMask, DL, DAG));
- return DAG.getNode(ISD::BITCAST, DL, VT, V);
+ return DAG.getBitcast(VT, V);
// Look for shuffle patterns which can be implemented as a single unpack.
@@ -20704,7 +20923,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
if (makeArrayRef(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) ||
makeArrayRef(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) {
// We can replace all three shuffles with an unpack.
- V = DAG.getNode(ISD::BITCAST, DL, VT, D.getOperand(0));
+ V = DAG.getBitcast(VT, D.getOperand(0));
return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL
@@ -20848,8 +21067,8 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
CanFold = SVOp->getMaskElt(i) < 0;
if (CanFold) {
- SDValue BC00 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(0));
- SDValue BC01 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(1));
+ SDValue BC00 = DAG.getBitcast(VT, BC0.getOperand(0));
+ SDValue BC01 = DAG.getBitcast(VT, BC0.getOperand(1));
SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01);
return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, &SVOp->getMask()[0]);
@@ -20981,7 +21200,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
Shuffle = DAG.getVectorShuffle(CurrentVT, dl,
InVec.getOperand(0), Shuffle,
- Shuffle = DAG.getNode(ISD::BITCAST, dl, OriginalVT, Shuffle);
+ Shuffle = DAG.getBitcast(OriginalVT, Shuffle);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
@@ -21101,7 +21320,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
SDValue Vals[4];
if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) {
- SDValue Cst = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, InputVector);
+ SDValue Cst = DAG.getBitcast(MVT::v2i64, InputVector);
EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy();
SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
DAG.getConstant(0, dl, VecIdxTy));
@@ -21717,13 +21936,13 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
if (TValIsAllOnes && FValIsAllZeros)
Ret = Cond;
else if (TValIsAllOnes)
- Ret = DAG.getNode(ISD::OR, DL, CondVT, Cond,
- DAG.getNode(ISD::BITCAST, DL, CondVT, RHS));
+ Ret =
+ DAG.getNode(ISD::OR, DL, CondVT, Cond, DAG.getBitcast(CondVT, RHS));
else if (FValIsAllZeros)
Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond,
- DAG.getNode(ISD::BITCAST, DL, CondVT, LHS));
+ DAG.getBitcast(CondVT, LHS));
- return DAG.getNode(ISD::BITCAST, DL, VT, Ret);
+ return DAG.getBitcast(VT, Ret);
@@ -22554,15 +22773,13 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
// and work with those going forward.
SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
- SDValue Vector32 = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32,
- Vector64);
+ SDValue Vector32 = DAG.getBitcast(MVT::v4f32, Vector64);
OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32,
Vector32, DAG.getIntPtrConstant(0, DL));
IntVT = MVT::i32;
- SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT,
- OnesOrZeroesF);
+ SDValue OnesOrZeroesI = DAG.getBitcast(IntVT, OnesOrZeroesF);
SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
DAG.getConstant(1, DL, IntVT));
SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
@@ -22775,7 +22992,7 @@ static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG,
SDValue NewShuffle = DAG.getVectorShuffle(Shuffle->getValueType(0), DL,
Shuffle->getOperand(0), DAG.getConstant(0, DL, SrcType), Mask);
- return DAG.getNode(ISD::BITCAST, DL, N0.getValueType(), NewShuffle);
+ return DAG.getBitcast(N0.getValueType(), NewShuffle);
static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
@@ -22916,7 +23133,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
"Unsupported VT for PSIGN");
Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0));
- return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
+ return DAG.getBitcast(VT, Mask);
// PBLENDVB only available on SSE 4.1
if (!Subtarget->hasSSE41())
@@ -22924,11 +23141,11 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
- X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X);
- Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y);
- Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask);
+ X = DAG.getBitcast(BlendVT, X);
+ Y = DAG.getBitcast(BlendVT, Y);
+ Mask = DAG.getBitcast(BlendVT, Mask);
Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X);
- return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
+ return DAG.getBitcast(VT, Mask);
@@ -23129,7 +23346,7 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
// Convert Src0 value
- SDValue WideSrc0 = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mld->getSrc0());
+ SDValue WideSrc0 = DAG.getBitcast(WideVecVT, Mld->getSrc0());
if (Mld->getSrc0().getOpcode() != ISD::UNDEF) {
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
@@ -23146,7 +23363,7 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
SDValue Mask = Mld->getMask();
if (Mask.getValueType() == VT) {
// Mask and original value have the same type
- NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
+ NewMask = DAG.getBitcast(WideVecVT, Mask);
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
@@ -23214,7 +23431,7 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
- SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mst->getValue());
+ SDValue WideVec = DAG.getBitcast(WideVecVT, Mst->getValue());
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
@@ -23231,7 +23448,7 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue Mask = Mst->getMask();
if (Mask.getValueType() == VT) {
// Mask and original value have the same type
- NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
+ NewMask = DAG.getBitcast(WideVecVT, Mask);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
@@ -23323,7 +23540,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
- SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, St->getValue());
+ SDValue WideVec = DAG.getBitcast(WideVecVT, St->getValue());
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
@@ -23354,7 +23571,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
StoreType, VT.getSizeInBits()/StoreType.getSizeInBits());
assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
- SDValue ShuffWide = DAG.getNode(ISD::BITCAST, dl, StoreVecVT, Shuff);
+ SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff);
SmallVector<SDValue, 8> Chains;
SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, dl,
@@ -23495,7 +23712,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue ExtOp0 = OldExtract.getOperand(0);
unsigned VecSize = ExtOp0.getValueSizeInBits();
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VecSize / 64);
- SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtOp0);
+ SDValue BitCast = DAG.getBitcast(VecVT, ExtOp0);
SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
BitCast, OldExtract.getOperand(1));
return DAG.getStore(St->getChain(), dl, NewExtract, St->getBasePtr(),
@@ -24239,10 +24456,10 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
// DAG.
SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
// The AND node needs bitcasts to/from an integer vector type around it.
- SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
+ SDValue MaskConst = DAG.getBitcast(IntVT, SourceConst);
SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
N->getOperand(0)->getOperand(0), MaskConst);
- SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
+ SDValue Res = DAG.getBitcast(VT, NewAnd);
return Res;
@@ -24442,8 +24659,7 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
// In this case, the inner vzext is completely dead because we're going to
// only look at bits inside of the low element. Just do the outer vzext on
// a bitcast of the input to the inner.
- return DAG.getNode(X86ISD::VZEXT, DL, VT,
- DAG.getNode(ISD::BITCAST, DL, OpVT, V));
+ return DAG.getNode(X86ISD::VZEXT, DL, VT, DAG.getBitcast(OpVT, V));
// Check if we can bypass extracting and re-inserting an element of an input
@@ -24465,7 +24681,7 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
DAG.getIntPtrConstant(0, DL));
- Op = DAG.getNode(ISD::BITCAST, DL, OpVT, OrigV);
+ Op = DAG.getBitcast(OpVT, OrigV);
return DAG.getNode(X86ISD::VZEXT, DL, VT, Op);
@@ -25301,6 +25517,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
Res.first = DestReg;
Res.second = &X86::GR64RegClass;
+ } else if (VT != MVT::Other) {
+ // Type mismatch and not a clobber: Return an error;
+ Res.first = 0;
+ Res.second = nullptr;
} else if (Res.second == &X86::FR32RegClass ||
Res.second == &X86::FR64RegClass ||
@@ -25326,13 +25546,23 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
Res.second = &X86::VR256RegClass;
else if (X86::VR512RegClass.hasType(VT))
Res.second = &X86::VR512RegClass;
+ else if (VT != MVT::Other) {
+ // Type mismatch and not a clobber: Return an error;
+ Res.first = 0;
+ Res.second = nullptr;
+ }
+ } else if (VT != MVT::Other) {
+ // Type mismatch and not a clobber: Return an error;
+ Res.first = 0;
+ Res.second = nullptr;
return Res;
int X86TargetLowering::getScalingFactorCost(const AddrMode &AM,
- Type *Ty) const {
+ Type *Ty,
+ unsigned AS) const {
// Scaling factors are not free at all.
// An indexed folded instruction, i.e., inst (reg1, reg2, scale),
// will take 2 allocations in the out of order engine instead of 1
@@ -25351,7 +25581,7 @@ int X86TargetLowering::getScalingFactorCost(const AddrMode &AM,
// E.g., on Haswell:
// vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
// vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
- if (isLegalAddressingMode(AM, Ty))
+ if (isLegalAddressingMode(AM, Ty, AS))
// Scale represents reg2 * scale, thus account for 1
// as soon as we use a second register.
return AM.Scale != 0;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index b589ca4..b5d062f 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -56,10 +56,6 @@ namespace llvm {
/// corresponds to X86::ANDNPS or X86::ANDNPD.
- /// Bitwise logical right shift of floating point values. This
- /// corresponds to X86::PSRLDQ.
/// These operations represent an abstract X86 call
/// instruction, which includes a bunch of information. In particular the
/// operands of these node are:
@@ -184,6 +180,9 @@ namespace llvm {
/// Shuffle 16 8-bit values within a vector.
+ /// Compute Sum of Absolute Differences.
/// Bitwise Logical AND NOT of Packed FP values.
@@ -200,6 +199,7 @@ namespace llvm {
/// Combined add and sub on an FP vector.
// FP vector ops with rounding mode.
@@ -207,7 +207,11 @@ namespace llvm {
+ // FP vector get exponent
// Integer add/sub with unsigned saturation.
@@ -355,6 +359,8 @@ namespace llvm {
+ //Shuffle Packed Values at 128-bit granularity
+ SHUF128,
@@ -374,6 +380,10 @@ namespace llvm {
+ //Fix Up Special Packed Float32/64 values
+ //Range Restriction Calculation For Packed Pairs of Float32/64 values
// Broadcast scalar to vector
// Broadcast subvector to vector
@@ -729,7 +739,8 @@ namespace llvm {
/// Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
/// Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can
@@ -748,7 +759,8 @@ namespace llvm {
/// of the specified type.
/// If the AM is supported, the return value must be >= 0.
/// If the AM is not supported, it returns a negative value.
- int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override;
+ int getScalingFactorCost(const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
bool isVectorShiftByScalarCheap(Type *Ty) const override;
diff --git a/lib/Target/X86/ b/lib/Target/X86/
index 9d11d3c..c1d0aef 100644
--- a/lib/Target/X86/
+++ b/lib/Target/X86/
@@ -1047,12 +1047,6 @@ multiclass avx512_permil<bits<8> OpcImm, bits<8> OpcVar, X86VectorVTInfo _,
-defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", X86VPermi, v8i64_info>,
- EVEX_V512, VEX_W;
-defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", X86VPermi, v8f64_info>,
- EVEX_V512, VEX_W;
defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>,
defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>,
@@ -1063,37 +1057,6 @@ def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
(VPERMILPDZri VR512:$src1, imm:$imm)>;
-// -- VPERM - register form --
-multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
- PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
- def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst,
- (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V;
- def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst,
- (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>,
- EVEX_4V;
-defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, loadv16i32, i512mem,
- v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, loadv8i64, i512mem,
- v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-let ExeDomain = SSEPackedSingle in
-defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, loadv16f32, f512mem,
- v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-let ExeDomain = SSEPackedDouble in
-defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, loadv8f64, f512mem,
- v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
// -- VPERM2I - 3 source operands form --
multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
PatFrag mem_frag, X86MemOperand x86memop,
@@ -3401,32 +3364,6 @@ defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
VR512, loadv8i64, i512mem>, EVEX_V512,
-// AVX-512 - PSHUFD
-multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
- SDNode OpNode, PatFrag mem_frag,
- X86MemOperand x86memop, ValueType OpVT> {
- def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
- def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
- (ins x86memop:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst,
- (OpVT (OpNode (mem_frag addr:$src1),
- (i8 imm:$src2))))]>, EVEX;
-defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, loadv16i32,
- i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
// AVX-512 Logical Instructions
@@ -3729,14 +3666,14 @@ multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
(ins _.RC:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
let mayLoad = 1 in
defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i8 imm:$src2))),
multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
@@ -3746,7 +3683,7 @@ multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
(ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -3836,16 +3773,16 @@ multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli>,
- avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>;
+ avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>, AVX512BIi8Base, EVEX_4V;
defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>,
- avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>;
+ avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>, AVX512BIi8Base, EVEX_4V;
defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai>,
- avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>;
+ avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>, AVX512BIi8Base, EVEX_4V;
-defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>;
-defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>;
+defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>, AVX512BIi8Base, EVEX_4V;
+defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>, AVX512BIi8Base, EVEX_4V;
defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>;
defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>;
@@ -3865,7 +3802,8 @@ multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))),
+ (_.VT (OpNode _.RC:$src1,
+ (_.VT (bitconvert (_.LdFrag addr:$src2))))),
EVEX_CD8<_.EltSize, CD8VF>;
@@ -3927,6 +3865,65 @@ defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;
+// 1-src variable permutation VPERMW/D/Q
+multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo _> {
+ let Predicates = [HasAVX512] in
+ defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
+ avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
+ let Predicates = [HasAVX512, HasVLX] in
+ defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
+ avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
+multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
+ string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo> {
+ let Predicates = [HasAVX512] in
+ defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info512>,
+ avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info512>, EVEX_V512;
+ let Predicates = [HasAVX512, HasVLX] in
+ defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info256>,
+ avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info256>, EVEX_V256;
+defm VPERM : avx512_var_shift_w<0x8D, "vpermw", X86VPermv>;
+defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
+ avx512vl_i32_info>;
+defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
+ avx512vl_i64_info>, VEX_W;
+defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
+ avx512vl_f32_info>;
+defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
+ avx512vl_f64_info>, VEX_W;
+defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
+ X86VPermi, avx512vl_i64_info>,
+ EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
+defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
+ X86VPermi, avx512vl_f64_info>,
+ EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
+defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
+ X86PShufd, avx512vl_i32_info>,
+ EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
+defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
+ X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W;
+defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
+ X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W;
// AVX-512 - MOVDDUP
@@ -4869,11 +4866,6 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
(ins _.RC:$src), OpcodeStr, "$src", "$src",
(OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>;
- defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src), OpcodeStr,
- "{sae}, $src", "$src, {sae}",
- (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B;
defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
(OpNode (_.FloatVT
@@ -4881,24 +4873,58 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
+ (ins _.MemOp:$src), OpcodeStr,
+ "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
(OpNode (_.FloatVT
(X86VBroadcast (_.ScalarLdFrag addr:$src))),
+multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ SDNode OpNode> {
+ defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src), OpcodeStr,
+ "{sae}, $src", "$src, {sae}",
+ (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B;
multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode> {
defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
- EVEX_CD8<32, CD8VF>;
+ avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
+ T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
- VEX_W, EVEX_CD8<32, CD8VF>;
+ avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
+ T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ // Define only if AVX512VL feature is present.
+ let Predicates = [HasVLX] in {
+ defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode>,
+ EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
+ defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode>,
+ EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
+ defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode>,
+ EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
+ defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode>,
+ EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
+ }
let Predicates = [HasERI], hasSideEffects = 0 in {
- defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX, EVEX_V512, T8PD;
- defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX, EVEX_V512, T8PD;
- defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX, EVEX_V512, T8PD;
+ defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX;
+ defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX;
+ defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX;
+defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>,
+ avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX;
+multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
+ SDNode OpNodeRnd, X86VectorVTInfo _>{
+ defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
+ (_.VT (OpNodeRnd _.RC:$src, (i32 imm:$rc)))>,
multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
@@ -5007,20 +5033,22 @@ multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
-defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>;
+multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
+ SDNode OpNodeRnd> {
+ defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd,
+ v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd,
+ v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
+defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>,
+ avx512_sqrt_packed_all_round<0x51, "vsqrt", X86fsqrtRnd>;
defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
let Predicates = [HasAVX512] in {
- def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
- (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
- (VSQRTPSZr VR512:$src1)>;
- def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
- (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
- (VSQRTPDZr VR512:$src1)>;
def : Pat<(f32 (fsqrt FR32X:$src)),
(VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
def : Pat<(f32 (fsqrt (load addr:$src))),
@@ -5583,30 +5611,6 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1,
(loadv8i64 addr:$src2), (i8 imm:$imm))),
(VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
-multiclass avx512_valign<X86VectorVTInfo _> {
- defm rri : AVX512_maskable<0x03, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
- "valign"##_.Suffix,
- "$src3, $src2, $src1", "$src1, $src2, $src3",
- (_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
- (i8 imm:$src3)))>,
- AVX512AIi8Base, EVEX_4V;
- // Also match valign of packed floats.
- def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
- (!cast<Instruction>(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>;
- let mayLoad = 1 in
- def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
- !strconcat("valign"##_.Suffix,
- "\t{$src3, $src2, $src1, $dst|"
- "$dst, $src1, $src2, $src3}"),
- []>, EVEX_4V;
-defm VALIGND : avx512_valign<v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VALIGNQ : avx512_valign<v8i64_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
// Helper fragments to match sext vXi1 to vXiY.
def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
@@ -5949,7 +5953,7 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
(_.LdFrag addr:$src))),
EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
let mayLoad = 1 in
def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src),
@@ -5958,7 +5962,6 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
(_.VT (bitconvert (_.LdFrag addr:$src))),
EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>;
multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
@@ -5979,3 +5982,212 @@ defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>,
defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
+//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
+// op(reg_vec2,mem_vec,imm)
+// op(reg_vec2,broadcast(eltVt),imm)
+//all instruction created with FROUND_CURRENT
+multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _>{
+ defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (i8 imm:$src3),
+ let mayLoad = 1 in {
+ defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ (i8 imm:$src3),
+ defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr##", $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
+ (i8 imm:$src3),
+ }
+//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
+// op(reg_vec2,mem_vec,imm)
+// op(reg_vec2,broadcast(eltVt),imm)
+multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _>{
+ defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (i8 imm:$src3))>;
+ let mayLoad = 1 in {
+ defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ (i8 imm:$src3))>;
+ defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr##", $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
+ (i8 imm:$src3))>, EVEX_B;
+ }
+//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
+// op(reg_vec2,mem_scalar,imm)
+//all instruction created with FROUND_CURRENT
+multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (i8 imm:$src3),
+ let mayLoad = 1 in {
+ defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (scalar_to_vector
+ (_.ScalarLdFrag addr:$src2))),
+ (i8 imm:$src3),
+ let isAsmParserOnly = 1 in {
+ defm rmi_alt :AVX512_maskable_in_asm<opc, MRMSrcMem, _, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ []>;
+ }
+ }
+//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
+multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, X86VectorVTInfo _>{
+ defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+ OpcodeStr, "$src3,{sae}, $src2, $src1",
+ "$src1, $src2,{sae}, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (i8 imm:$src3),
+ (i32 FROUND_NO_EXC))>, EVEX_B;
+//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
+multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, X86VectorVTInfo _> {
+ defm NAME: avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _>;
+multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
+ AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
+ let Predicates = [prd] in {
+ defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
+ avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
+ EVEX_V512;
+ }
+ let Predicates = [prd, HasVLX] in {
+ defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
+ EVEX_V128;
+ defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
+ EVEX_V256;
+ }
+multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
+ bits<8> opc, SDNode OpNode>{
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
+ }
+ let Predicates = [HasAVX512, HasVLX] in {
+ defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
+ defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
+ }
+multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
+ X86VectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
+ let Predicates = [prd] in {
+ defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, _>,
+ avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNode, _>;
+ }
+defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd",
+ avx512vl_f64_info, 0x54, X86VFixupimm, HasAVX512>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+defm VFIXUPIMMPS : avx512_common_fp_sae_packed_imm<"vfixupimmps",
+ avx512vl_f32_info, 0x54, X86VFixupimm, HasAVX512>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+defm VFIXUPIMMSD: avx512_common_fp_sae_scalar_imm<"vfixupimmsd", f64x_info,
+ 0x55, X86VFixupimm, HasAVX512>,
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info,
+ 0x55, X86VFixupimm, HasAVX512>,
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
+ 0x50, X86VRange, HasDQI>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
+ 0x50, X86VRange, HasDQI>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", f64x_info,
+ 0x51, X86VRange, HasDQI>,
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
+ 0x51, X86VRange, HasDQI>,
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
+ bits<8> opc, SDNode OpNode = X86Shuf128>{
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
+ }
+ let Predicates = [HasAVX512, HasVLX] in {
+ defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
+ }
+defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2",avx512vl_f64_info, 0x23>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+multiclass avx512_valign<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
+ AVX512VLVectorVTInfo VTInfo_FP>{
+ defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign>,
+ AVX512AIi8Base, EVEX_4V;
+ let isCodeGenOnly = 1 in {
+ defm NAME#_FP: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0x03, X86VAlign>,
+ AVX512AIi8Base, EVEX_4V;
+ }
+defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>,
+ EVEX_CD8<32, CD8VF>;
+defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>,
+ EVEX_CD8<64, CD8VF>, VEX_W;
diff --git a/lib/Target/X86/ b/lib/Target/X86/
index 331faf2..e2fa295 100644
--- a/lib/Target/X86/
+++ b/lib/Target/X86/
@@ -764,6 +764,14 @@ class AVX512BIi8Base : PD {
Domain ExeDomain = SSEPackedInt;
ImmType ImmT = Imm8;
+class AVX512XSIi8Base : XS {
+ Domain ExeDomain = SSEPackedInt;
+ ImmType ImmT = Imm8;
+class AVX512XDIi8Base : XD {
+ Domain ExeDomain = SSEPackedInt;
+ ImmType ImmT = Imm8;
class AVX512PSIi8Base : PS {
Domain ExeDomain = SSEPackedSingle;
ImmType ImmT = Imm8;
diff --git a/lib/Target/X86/ b/lib/Target/X86/
index 79d213c..dfe58ef 100644
--- a/lib/Target/X86/
+++ b/lib/Target/X86/
@@ -35,8 +35,6 @@ def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>;
// SSE specific DAG Nodes.
-def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
- SDTCisFP<0>, SDTCisInt<2> ]>;
def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
SDTCisFP<1>, SDTCisVT<3, i8>,
@@ -65,7 +63,6 @@ def X86fandn : SDNode<"X86ISD::FANDN", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
-def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
@@ -78,6 +75,9 @@ def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
def X86pshufb : SDNode<"X86ISD::PSHUFB",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+def X86psadbw : SDNode<"X86ISD::PSADBW",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
def X86andnp : SDNode<"X86ISD::ANDNP",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
@@ -219,6 +219,8 @@ def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisInt<2>]>;
def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisInt<3>, SDTCisInt<4>]>;
def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
@@ -229,6 +231,9 @@ def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc.
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>]>;
+def SDTFPUnaryOpRound : SDTypeProfile<1, 2, [ // fsqrt_round, fgetexp_round, etc.
+ SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]>;
def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
@@ -247,7 +252,8 @@ def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
-def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>;
+def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>;
+def X86Shuf128 : SDNode<"X86ISD::SHUF128", SDTShuff3OpI>;
def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
@@ -279,6 +285,9 @@ def X86VPermiv3 : SDNode<"X86ISD::VPERMIV3", SDTShuff3Op>;
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
+def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPBinOpImmRound>;
+def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImmRound>;
def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisSubVecOfVec<1, 0>]>, []>;
@@ -298,6 +307,8 @@ def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>;
def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>;
+def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>;
+def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>;
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 43decf7..6b7a929 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -433,6 +433,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
+ { X86::BSF16rr, X86::BSF16rm, 0 },
+ { X86::BSF32rr, X86::BSF32rm, 0 },
+ { X86::BSF64rr, X86::BSF64rm, 0 },
+ { X86::BSR16rr, X86::BSR16rm, 0 },
+ { X86::BSR32rr, X86::BSR32rm, 0 },
+ { X86::BSR64rr, X86::BSR64rm, 0 },
{ X86::CMP16rr, X86::CMP16rm, 0 },
{ X86::CMP32rr, X86::CMP32rm, 0 },
{ X86::CMP64rr, X86::CMP64rm, 0 },
@@ -1690,8 +1696,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSUBQZrr, X86::VPSUBQZrm, 0 },
{ X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 },
{ X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 },
- { X86::VALIGNQrri, X86::VALIGNQrmi, 0 },
- { X86::VALIGNDrri, X86::VALIGNDrmi, 0 },
+ { X86::VALIGNQZrri, X86::VALIGNQZrmi, 0 },
+ { X86::VALIGNDZrri, X86::VALIGNDZrmi, 0 },
{ X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 },
@@ -4697,8 +4703,17 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
return false;
+static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs) {
+ unsigned NumAddrOps = MOs.size();
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ if (NumAddrOps < 4) // FrameIndex only
+ addOffset(MIB, 0);
static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
ArrayRef<MachineOperand> MOs,
+ MachineBasicBlock::iterator InsertPt,
MachineInstr *MI,
const TargetInstrInfo &TII) {
// Create the base instruction with the memory operand as the first part.
@@ -4706,11 +4721,7 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
MI->getDebugLoc(), true);
MachineInstrBuilder MIB(MF, NewMI);
- unsigned NumAddrOps = MOs.size();
- for (unsigned i = 0; i != NumAddrOps; ++i)
- MIB.addOperand(MOs[i]);
- if (NumAddrOps < 4) // FrameIndex only
- addOffset(MIB, 0);
+ addOperands(MIB, MOs);
// Loop over the rest of the ri operands, converting them over.
unsigned NumOps = MI->getDesc().getNumOperands()-2;
@@ -4722,11 +4733,16 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
MachineOperand &MO = MI->getOperand(i);
+ MachineBasicBlock *MBB = InsertPt->getParent();
+ MBB->insert(InsertPt, NewMI);
return MIB;
static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
unsigned OpNo, ArrayRef<MachineOperand> MOs,
+ MachineBasicBlock::iterator InsertPt,
MachineInstr *MI, const TargetInstrInfo &TII) {
// Omit the implicit operands, something BuildMI can't do.
MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
@@ -4737,38 +4753,32 @@ static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
MachineOperand &MO = MI->getOperand(i);
if (i == OpNo) {
assert(MO.isReg() && "Expected to fold into reg operand!");
- unsigned NumAddrOps = MOs.size();
- for (unsigned i = 0; i != NumAddrOps; ++i)
- MIB.addOperand(MOs[i]);
- if (NumAddrOps < 4) // FrameIndex only
- addOffset(MIB, 0);
+ addOperands(MIB, MOs);
} else {
+ MachineBasicBlock *MBB = InsertPt->getParent();
+ MBB->insert(InsertPt, NewMI);
return MIB;
static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
ArrayRef<MachineOperand> MOs,
+ MachineBasicBlock::iterator InsertPt,
MachineInstr *MI) {
- MachineFunction &MF = *MI->getParent()->getParent();
- MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode));
- unsigned NumAddrOps = MOs.size();
- for (unsigned i = 0; i != NumAddrOps; ++i)
- MIB.addOperand(MOs[i]);
- if (NumAddrOps < 4) // FrameIndex only
- addOffset(MIB, 0);
+ MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
+ MI->getDebugLoc(), TII.get(Opcode));
+ addOperands(MIB, MOs);
return MIB.addImm(0);
-MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- unsigned OpNum,
- ArrayRef<MachineOperand> MOs,
- unsigned Size, unsigned Align,
- bool AllowCommute) const {
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr *MI, unsigned OpNum,
+ ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
+ unsigned Size, unsigned Align, bool AllowCommute) const {
const DenseMap<unsigned,
std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
bool isCallRegIndirect = Subtarget.callRegIndirect();
@@ -4802,7 +4812,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
isTwoAddrFold = true;
} else if (OpNum == 0) {
if (MI->getOpcode() == X86::MOV32r0) {
- NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
+ NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, InsertPt, MI);
if (NewMI)
return NewMI;
@@ -4847,9 +4857,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
if (isTwoAddrFold)
- NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this);
+ NewMI = FuseTwoAddrInst(MF, Opcode, MOs, InsertPt, MI, *this);
- NewMI = FuseInst(MF, Opcode, OpNum, MOs, MI, *this);
+ NewMI = FuseInst(MF, Opcode, OpNum, MOs, InsertPt, MI, *this);
if (NarrowToMOV32rm) {
// If this is the special case where we use a MOV32rm to load a 32-bit
@@ -4901,8 +4911,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Attempt to fold with the commuted version of the instruction.
unsigned CommuteOp =
(CommuteOpIdx1 == OriginalOpIdx ? CommuteOpIdx2 : CommuteOpIdx1);
- NewMI = foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, Size, Align,
- /*AllowCommute=*/false);
+ NewMI =
+ foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, InsertPt, Size, Align,
+ /*AllowCommute=*/false);
if (NewMI)
return NewMI;
@@ -5131,10 +5142,9 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
MI->addRegisterKilled(Reg, TRI, true);
-MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- int FrameIndex) const {
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
// Check switch flag
if (NoFusing) return nullptr;
@@ -5173,8 +5183,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
return nullptr;
return foldMemoryOperandImpl(MF, MI, Ops[0],
- MachineOperand::CreateFI(FrameIndex), Size,
- Alignment, /*AllowCommute=*/true);
+ MachineOperand::CreateFI(FrameIndex), InsertPt,
+ Size, Alignment, /*AllowCommute=*/true);
static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
@@ -5196,17 +5206,16 @@ static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
return false;
-MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- MachineInstr *LoadMI) const {
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const {
// If loading from a FrameIndex, fold directly from the FrameIndex.
unsigned NumOps = LoadMI->getDesc().getNumOperands();
int FrameIndex;
if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
if (isPartialRegisterLoad(*LoadMI, MF))
return nullptr;
- return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex);
+ return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex);
// Check switch flag
@@ -5326,7 +5335,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- return foldMemoryOperandImpl(MF, MI, Ops[0], MOs,
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, InsertPt,
/*Size=*/0, Alignment, /*AllowCommute=*/true);
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 0dd8101..ac1b2d4 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -307,6 +307,7 @@ public:
/// references has been changed.
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt,
int FrameIndex) const override;
/// foldMemoryOperand - Same as the previous version except it allows folding
@@ -314,6 +315,7 @@ public:
/// stack slot.
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt,
MachineInstr *LoadMI) const override;
/// canFoldMemoryOperand - Returns true if the specified load / store is
@@ -407,6 +409,7 @@ public:
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
unsigned OpNum,
ArrayRef<MachineOperand> MOs,
+ MachineBasicBlock::iterator InsertPt,
unsigned Size, unsigned Alignment,
bool AllowCommute) const;
diff --git a/lib/Target/X86/ b/lib/Target/X86/
index 70c2027520..e936b4b 100644
--- a/lib/Target/X86/
+++ b/lib/Target/X86/
@@ -788,6 +788,7 @@ def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
+def HasMPX : Predicate<"Subtarget->hasMPX()">;
def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">;
@@ -2456,6 +2457,9 @@ include ""
include ""
include ""
+// MPX instructions
+include ""
include ""
include ""
diff --git a/lib/Target/X86/ b/lib/Target/X86/
new file mode 100644
index 0000000..cf5e2e3
--- /dev/null
+++ b/lib/Target/X86/
@@ -0,0 +1,70 @@
+//===-- - MPX Instruction Set ---------*- tablegen -*-===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file describes the X86 MPX instruction set, defining the
+// instructions, and properties of the instructions which are needed for code
+// generation, machine code emission, and analysis.
+multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> {
+ def 32rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins i32mem:$src),
+ OpcodeStr#" \t{$src, $dst|$dst, $src}", []>,
+ Requires<[HasMPX, Not64BitMode]>;
+ def 64rm: RI<opc, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
+ OpcodeStr#" \t{$src, $dst|$dst, $src}", []>,
+ Requires<[HasMPX, In64BitMode]>;
+defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS;
+multiclass mpx_bound_check<bits<8> opc, string OpcodeStr> {
+ def 32rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, i32mem:$src2),
+ OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+ Requires<[HasMPX, Not64BitMode]>;
+ def 64rm: RI<opc, MRMSrcMem, (outs), (ins BNDR:$src1, i64mem:$src2),
+ OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+ Requires<[HasMPX, In64BitMode]>;
+ def 32rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR32:$src2),
+ OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+ Requires<[HasMPX, Not64BitMode]>;
+ def 64rr: RI<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR64:$src2),
+ OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+ Requires<[HasMPX, In64BitMode]>;
+defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS;
+defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD;
+defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD;
+def BNDMOVRMrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src),
+ "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ Requires<[HasMPX]>;
+def BNDMOVRM32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
+ "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ Requires<[HasMPX, Not64BitMode]>;
+def BNDMOVRM64rm : RI<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src),
+ "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ Requires<[HasMPX, In64BitMode]>;
+def BNDMOVMRrr : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src),
+ "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ Requires<[HasMPX]>;
+def BNDMOVMR32mr : I<0x1B, MRMDestMem, (outs i64mem:$dst), (ins BNDR:$src),
+ "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ Requires<[HasMPX, Not64BitMode]>;
+def BNDMOVMR64mr : RI<0x1B, MRMDestMem, (outs i128mem:$dst), (ins BNDR:$src),
+ "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ Requires<[HasMPX, In64BitMode]>;
+def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src),
+ "bndstx \t{$src, $dst|$dst, $src}", []>, TB,
+ Requires<[HasMPX]>;
+def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
+ "bndldx \t{$src, $dst|$dst, $src}", []>, TB,
+ Requires<[HasMPX]>; \ No newline at end of file
diff --git a/lib/Target/X86/ b/lib/Target/X86/
index d3b401e..8294e38 100644
--- a/lib/Target/X86/
+++ b/lib/Target/X86/
@@ -3560,7 +3560,7 @@ multiclass scalar_unary_math_patterns<Intrinsic Intr, string OpcPrefix,
let Predicates = [HasAVX] in {
def : Pat<(VT (Move VT:$dst, (Intr VT:$src))),
(!cast<I>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>;
def : Pat<(VT (X86Blendi VT:$dst, (Intr VT:$src), (i8 1))),
(!cast<I>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>;
@@ -4053,6 +4053,20 @@ defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
int_x86_avx2_psad_bw, SSE_PMADD, 1>;
+let Predicates = [HasAVX2] in
+ def : Pat<(v32i8 (X86psadbw (v32i8 VR256:$src1),
+ (v32i8 VR256:$src2))),
+ (VPSADBWYrr VR256:$src2, VR256:$src1)>;
+let Predicates = [HasAVX] in
+ def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1),
+ (v16i8 VR128:$src2))),
+ (VPSADBWrr VR128:$src2, VR128:$src1)>;
+def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1),
+ (v16i8 VR128:$src2))),
+ (PSADBWrr VR128:$src2, VR128:$src1)>;
let Predicates = [HasAVX] in
defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
@@ -4207,16 +4221,6 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
} // Constraints = "$src1 = $dst"
-let Predicates = [HasAVX] in {
- def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
- (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
-let Predicates = [UseSSE2] in {
- def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
- (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
// SSE2 - Packed Integer Comparison Instructions
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 4af514a..0268066 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -21,7 +21,8 @@ enum IntrinsicType {
@@ -339,9 +340,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_div_ps_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
X86_INTRINSIC_DATA(avx512_mask_expand_d_128, COMPRESS_EXPAND_IN_REG,
X86_INTRINSIC_DATA(avx512_mask_expand_d_256, COMPRESS_EXPAND_IN_REG,
@@ -366,6 +367,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG,
+ X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM,
+ X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK_RM,
+ X86_INTRINSIC_DATA(avx512_mask_getexp_pd_512, INTR_TYPE_1OP_MASK_RM,
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ps_128, INTR_TYPE_1OP_MASK_RM,
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK_RM,
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM,
X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
@@ -559,6 +572,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM,
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
X86_INTRINSIC_DATA(avx512_mask_sub_pd_128, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_pd_256, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
@@ -583,6 +604,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 556b518..ff1436a 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -128,6 +128,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
const DataLayout *DL = TM.getDataLayout();
assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference");
+ MCSymbol *Sym = nullptr;
SmallString<128> Name;
StringRef Suffix;
@@ -160,12 +161,14 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
getMang()->getNameWithPrefix(Name, MO.getSymbolName());
} else if (MO.isMBB()) {
- Name += MO.getMBB()->getSymbol()->getName();
+ assert(Suffix.empty());
+ Sym = MO.getMBB()->getSymbol();
unsigned OrigLen = Name.size() - PrefixLen;
Name += Suffix;
- MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
+ if (!Sym)
+ Sym = Ctx.getOrCreateSymbol(Name);
StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen);
@@ -240,10 +243,10 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break;
- Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
// Subtract the pic base.
- Expr = MCBinaryExpr::CreateSub(Expr,
- MCSymbolRefExpr::Create(MF.getPICBaseSymbol(),
+ Expr = MCBinaryExpr::createSub(Expr,
+ MCSymbolRefExpr::create(MF.getPICBaseSymbol(),
@@ -264,10 +267,10 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
- Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, Ctx);
// Subtract the pic base.
- Expr = MCBinaryExpr::CreateSub(Expr,
- MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx),
+ Expr = MCBinaryExpr::createSub(Expr,
+ MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx),
if (MO.isJTI()) {
@@ -277,17 +280,17 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
// section so we are restricting it to jumptable references.
MCSymbol *Label = Ctx.createTempSymbol();
AsmPrinter.OutStreamer->EmitAssignment(Label, Expr);
- Expr = MCSymbolRefExpr::Create(Label, Ctx);
+ Expr = MCSymbolRefExpr::create(Label, Ctx);
if (!Expr)
- Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
+ Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
- Expr = MCBinaryExpr::CreateAdd(Expr,
- MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Expr = MCBinaryExpr::createAdd(Expr,
+ MCConstantExpr::create(MO.getOffset(), Ctx),
return MCOperand::createExpr(Expr);
@@ -710,7 +713,7 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
- const MCSymbolRefExpr *symRef = MCSymbolRefExpr::Create(sym, SRVK, context);
+ const MCSymbolRefExpr *symRef = MCSymbolRefExpr::create(sym, SRVK, context);
if (is64Bits) {
@@ -749,7 +752,7 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
MCSymbol *tlsGetAddr = context.getOrCreateSymbol(name);
const MCSymbolRefExpr *tlsRef =
- MCSymbolRefExpr::Create(tlsGetAddr,
+ MCSymbolRefExpr::create(tlsGetAddr,
@@ -1071,7 +1074,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// FIXME: We would like an efficient form for this, so we don't have to do a
// lot of extra uniquing.
- .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
+ .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
// Emit the label.
@@ -1100,12 +1103,12 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Now that we have emitted the label, lower the complex operand expression.
MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
- const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+ const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
const MCExpr *PICBase =
- MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext);
- DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext);
+ MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
+ DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
- DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
+ DotExpr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(OpSym,OutContext),
DotExpr, OutContext);
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 1f36163..e9b6bfc 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -175,12 +175,12 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
return &X86::GR64_NOSPRegClass;
return &X86::GR32_NOSPRegClass;
case 2: // Available for tailcall (not callee-saved GPRs).
- if (IsWin64)
+ const Function *F = MF.getFunction();
+ if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64))
return &X86::GR64_TCW64RegClass;
else if (Is64Bit)
return &X86::GR64_TCRegClass;
- const Function *F = MF.getFunction();
bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false);
if (hasHipeCC)
return &X86::GR32RegClass;
diff --git a/lib/Target/X86/ b/lib/Target/X86/
index 2e735fa..cdb151c 100644
--- a/lib/Target/X86/
+++ b/lib/Target/X86/
@@ -302,6 +302,11 @@ def CR15 : X86Reg<"cr15", 15>;
def EIZ : X86Reg<"eiz", 4>;
def RIZ : X86Reg<"riz", 4>;
+// Bound registers, used in MPX instructions
+def BND0 : X86Reg<"bnd0", 0>;
+def BND1 : X86Reg<"bnd1", 1>;
+def BND2 : X86Reg<"bnd2", 2>;
+def BND3 : X86Reg<"bnd3", 3>;
// Register Class Definitions... now that we have all of the pieces, define the
@@ -484,3 +489,6 @@ def VK8WM : RegisterClass<"X86", [v8i1], 8, (sub VK8, K0)> {let Size = 8;}
def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;}
def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
+// Bound registers
+def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>; \ No newline at end of file
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 1cdab14..74af29f 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -259,6 +259,7 @@ void X86Subtarget::initializeEnvironment() {
HasSHA = false;
HasPRFCHW = false;
HasRDSEED = false;
+ HasMPX = false;
IsBTMemSlow = false;
IsSHLDSlow = false;
IsUAMemFast = false;
@@ -273,8 +274,6 @@ void X86Subtarget::initializeEnvironment() {
LEAUsesAG = false;
SlowLEA = false;
SlowIncDec = false;
- UseSqrtEst = false;
- UseReciprocalEst = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
MaxInlineSizeThreshold = 128;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 455dd77..a476f7a 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -190,16 +190,6 @@ protected:
/// True if INC and DEC instructions are slow when writing to flags
bool SlowIncDec;
- /// Use the RSQRT* instructions to optimize square root calculations.
- /// For this to be profitable, the cost of FSQRT and FDIV must be
- /// substantially higher than normal FP ops like FADD and FMUL.
- bool UseSqrtEst;
- /// Use the RCP* instructions to optimize FP division calculations.
- /// For this to be profitable, the cost of FDIV must be
- /// substantially higher than normal FP ops like FADD and FMUL.
- bool UseReciprocalEst;
/// Processor has AVX-512 PreFetch Instructions
bool HasPFI;
@@ -218,6 +208,9 @@ protected:
/// Processor has AVX-512 Vector Length eXtenstions
bool HasVLX;
+ /// Processot supports MPX - Memory Protection Extensions
+ bool HasMPX;
/// Use software floating point for code generation.
bool UseSoftFloat;
@@ -377,14 +370,13 @@ public:
bool LEAusesAG() const { return LEAUsesAG; }
bool slowLEA() const { return SlowLEA; }
bool slowIncDec() const { return SlowIncDec; }
- bool useSqrtEst() const { return UseSqrtEst; }
- bool useReciprocalEst() const { return UseReciprocalEst; }
bool hasCDI() const { return HasCDI; }
bool hasPFI() const { return HasPFI; }
bool hasERI() const { return HasERI; }
bool hasDQI() const { return HasDQI; }
bool hasBWI() const { return HasBWI; }
bool hasVLX() const { return HasVLX; }
+ bool hasMPX() const { return HasMPX; }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
bool isSLM() const { return X86ProcFamily == IntelSLM; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 3e5f1d8..646cff7 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -105,6 +105,13 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU,
if (Subtarget.isTargetWin64())
this->Options.TrapUnreachable = true;
+ // TODO: By default, all reciprocal estimate operations are off because
+ // that matches the behavior before TargetRecip was added (except for btver2
+ // which used subtarget features to enable this type of codegen).
+ // We should change this to match GCC behavior where everything but
+ // scalar division estimates are turned on by default with -ffast-math.
+ this->Options.Reciprocals.setDefaults("all", false, 1);
@@ -221,9 +228,9 @@ bool X86PassConfig::addILPOpts() {
bool X86PassConfig::addPreISel() {
- // Only add this pass for 32-bit x86.
+ // Only add this pass for 32-bit x86 Windows.
Triple TT(TM->getTargetTriple());
- if (TT.getArch() == Triple::x86)
+ if (TT.isOSWindows() && TT.getArch() == Triple::x86)
return true;
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 6bf45c3..f9f6290 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -32,9 +32,9 @@ const MCExpr *X86_64MachoTargetObjectFile::getTTypeGlobalReference(
if ((Encoding & DW_EH_PE_indirect) && (Encoding & DW_EH_PE_pcrel)) {
const MCSymbol *Sym = TM.getSymbol(GV, Mang);
const MCExpr *Res =
- MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
- const MCExpr *Four = MCConstantExpr::Create(4, getContext());
- return MCBinaryExpr::CreateAdd(Res, Four, getContext());
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
+ const MCExpr *Four = MCConstantExpr::create(4, getContext());
+ return MCBinaryExpr::createAdd(Res, Four, getContext());
return TargetLoweringObjectFileMachO::getTTypeGlobalReference(
@@ -55,14 +55,14 @@ const MCExpr *X86_64MachoTargetObjectFile::getIndirectSymViaGOTPCRel(
// foo@GOTPCREL+4+<offset>.
unsigned FinalOff = Offset+MV.getConstant()+4;
const MCExpr *Res =
- MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
- const MCExpr *Off = MCConstantExpr::Create(FinalOff, getContext());
- return MCBinaryExpr::CreateAdd(Res, Off, getContext());
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
+ const MCExpr *Off = MCConstantExpr::create(FinalOff, getContext());
+ return MCBinaryExpr::createAdd(Res, Off, getContext());
const MCExpr *X86ELFTargetObjectFile::getDebugThreadLocalSymbol(
const MCSymbol *Sym) const {
- return MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext());
+ return MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext());
@@ -116,7 +116,7 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol(
if (GOLHS->isThreadLocal())
return nullptr;
- return MCSymbolRefExpr::Create(TM.getSymbol(GOLHS, Mang),
+ return MCSymbolRefExpr::create(TM.getSymbol(GOLHS, Mang),
diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp
index 4efaada..ce69ea7 100644
--- a/lib/Target/X86/X86WinEHState.cpp
+++ b/lib/Target/X86/X86WinEHState.cpp
@@ -16,6 +16,7 @@
#include "X86.h"
#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Dominators.h"
@@ -59,30 +60,49 @@ public:
void emitExceptionRegistrationRecord(Function *F);
- void linkExceptionRegistration(IRBuilder<> &Builder, Value *RegNode,
- Value *Handler);
- void unlinkExceptionRegistration(IRBuilder<> &Builder, Value *RegNode);
+ void linkExceptionRegistration(IRBuilder<> &Builder, Value *Handler);
+ void unlinkExceptionRegistration(IRBuilder<> &Builder);
+ void addCXXStateStores(Function &F, MachineModuleInfo &MMI);
+ void addCXXStateStoresToFunclet(Value *ParentRegNode, WinEHFuncInfo &FuncInfo,
+ Function &F, int BaseState);
+ void insertStateNumberStore(Value *ParentRegNode, Instruction *IP, int State);
Value *emitEHLSDA(IRBuilder<> &Builder, Function *F);
Function *generateLSDAInEAXThunk(Function *ParentFunc);
+ int escapeRegNode(Function &F);
// Module-level type getters.
- Type *getEHRegistrationType();
- Type *getSEH3RegistrationType();
- Type *getSEH4RegistrationType();
- Type *getCXXEH3RegistrationType();
+ Type *getEHLinkRegistrationType();
+ Type *getSEHRegistrationType();
+ Type *getCXXEHRegistrationType();
// Per-module data.
Module *TheModule = nullptr;
- StructType *EHRegistrationTy = nullptr;
- StructType *CXXEH3RegistrationTy = nullptr;
- StructType *SEH3RegistrationTy = nullptr;
- StructType *SEH4RegistrationTy = nullptr;
+ StructType *EHLinkRegistrationTy = nullptr;
+ StructType *CXXEHRegistrationTy = nullptr;
+ StructType *SEHRegistrationTy = nullptr;
+ Function *FrameRecover = nullptr;
+ Function *FrameAddress = nullptr;
+ Function *FrameEscape = nullptr;
// Per-function state
EHPersonality Personality = EHPersonality::Unknown;
Function *PersonalityFn = nullptr;
+ /// The stack allocation containing all EH data, including the link in the
+ /// fs:00 chain and the current state.
+ AllocaInst *RegNode = nullptr;
+ /// Struct type of RegNode. Used for GEPing.
+ Type *RegNodeTy = nullptr;
+ /// The index of the state field of RegNode.
+ int StateFieldIndex = ~0U;
+ /// The linked list node subobject inside of RegNode.
+ Value *Link = nullptr;
@@ -92,16 +112,21 @@ char WinEHStatePass::ID = 0;
bool WinEHStatePass::doInitialization(Module &M) {
TheModule = &M;
+ FrameEscape = Intrinsic::getDeclaration(TheModule, Intrinsic::frameescape);
+ FrameRecover = Intrinsic::getDeclaration(TheModule, Intrinsic::framerecover);
+ FrameAddress = Intrinsic::getDeclaration(TheModule, Intrinsic::frameaddress);
return false;
bool WinEHStatePass::doFinalization(Module &M) {
assert(TheModule == &M);
TheModule = nullptr;
- EHRegistrationTy = nullptr;
- CXXEH3RegistrationTy = nullptr;
- SEH3RegistrationTy = nullptr;
- SEH4RegistrationTy = nullptr;
+ EHLinkRegistrationTy = nullptr;
+ CXXEHRegistrationTy = nullptr;
+ SEHRegistrationTy = nullptr;
+ FrameEscape = nullptr;
+ FrameRecover = nullptr;
+ FrameAddress = nullptr;
return false;
@@ -136,8 +161,19 @@ bool WinEHStatePass::runOnFunction(Function &F) {
if (!isMSVCEHPersonality(Personality))
return false;
+ // Disable frame pointer elimination in this function.
+ // FIXME: Do the nested handlers need to keep the parent ebp in ebp, or can we
+ // use an arbitrary register?
+ F.addFnAttr("no-frame-pointer-elim", "true");
- // FIXME: State insertion.
+ auto *MMIPtr = getAnalysisIfAvailable<MachineModuleInfo>();
+ assert(MMIPtr && "MachineModuleInfo should always be available");
+ MachineModuleInfo &MMI = *MMIPtr;
+ if (Personality == EHPersonality::MSVC_CXX) {
+ addCXXStateStores(F, MMI);
+ }
// Reset per-function state.
PersonalityFn = nullptr;
@@ -152,17 +188,17 @@ bool WinEHStatePass::runOnFunction(Function &F) {
/// EHRegistrationNode *Next;
/// };
-Type *WinEHStatePass::getEHRegistrationType() {
- if (EHRegistrationTy)
- return EHRegistrationTy;
+Type *WinEHStatePass::getEHLinkRegistrationType() {
+ if (EHLinkRegistrationTy)
+ return EHLinkRegistrationTy;
LLVMContext &Context = TheModule->getContext();
- EHRegistrationTy = StructType::create(Context, "EHRegistrationNode");
+ EHLinkRegistrationTy = StructType::create(Context, "EHRegistrationNode");
Type *FieldTys[] = {
- EHRegistrationTy->getPointerTo(0), // EHRegistrationNode *Next
+ EHLinkRegistrationTy->getPointerTo(0), // EHRegistrationNode *Next
Type::getInt8PtrTy(Context) // EXCEPTION_DISPOSITION (*Handler)(...)
- EHRegistrationTy->setBody(FieldTys, false);
- return EHRegistrationTy;
+ EHLinkRegistrationTy->setBody(FieldTys, false);
+ return EHLinkRegistrationTy;
/// The __CxxFrameHandler3 registration node:
@@ -171,40 +207,21 @@ Type *WinEHStatePass::getEHRegistrationType() {
/// EHRegistrationNode SubRecord;
/// int32_t TryLevel;
/// };
-Type *WinEHStatePass::getCXXEH3RegistrationType() {
- if (CXXEH3RegistrationTy)
- return CXXEH3RegistrationTy;
+Type *WinEHStatePass::getCXXEHRegistrationType() {
+ if (CXXEHRegistrationTy)
+ return CXXEHRegistrationTy;
LLVMContext &Context = TheModule->getContext();
Type *FieldTys[] = {
Type::getInt8PtrTy(Context), // void *SavedESP
- getEHRegistrationType(), // EHRegistrationNode SubRecord
+ getEHLinkRegistrationType(), // EHRegistrationNode SubRecord
Type::getInt32Ty(Context) // int32_t TryLevel
- CXXEH3RegistrationTy =
+ CXXEHRegistrationTy =
StructType::create(FieldTys, "CXXExceptionRegistration");
- return CXXEH3RegistrationTy;
-/// The _except_handler3 registration node:
-/// struct EH3ExceptionRegistration {
-/// EHRegistrationNode SubRecord;
-/// void *ScopeTable;
-/// int32_t TryLevel;
-/// };
-Type *WinEHStatePass::getSEH3RegistrationType() {
- if (SEH3RegistrationTy)
- return SEH3RegistrationTy;
- LLVMContext &Context = TheModule->getContext();
- Type *FieldTys[] = {
- getEHRegistrationType(), // EHRegistrationNode SubRecord
- Type::getInt8PtrTy(Context), // void *ScopeTable
- Type::getInt32Ty(Context) // int32_t TryLevel
- };
- SEH3RegistrationTy = StructType::create(FieldTys, "EH3ExceptionRegistration");
- return SEH3RegistrationTy;
+ return CXXEHRegistrationTy;
-/// The _except_handler4 registration node:
+/// The _except_handler3/4 registration node:
/// struct EH4ExceptionRegistration {
/// void *SavedESP;
/// _EXCEPTION_POINTERS *ExceptionPointers;
@@ -212,19 +229,19 @@ Type *WinEHStatePass::getSEH3RegistrationType() {
/// int32_t EncodedScopeTable;
/// int32_t TryLevel;
/// };
-Type *WinEHStatePass::getSEH4RegistrationType() {
- if (SEH4RegistrationTy)
- return SEH4RegistrationTy;
+Type *WinEHStatePass::getSEHRegistrationType() {
+ if (SEHRegistrationTy)
+ return SEHRegistrationTy;
LLVMContext &Context = TheModule->getContext();
Type *FieldTys[] = {
Type::getInt8PtrTy(Context), // void *SavedESP
Type::getInt8PtrTy(Context), // void *ExceptionPointers
- getEHRegistrationType(), // EHRegistrationNode SubRecord
+ getEHLinkRegistrationType(), // EHRegistrationNode SubRecord
Type::getInt32Ty(Context), // int32_t EncodedScopeTable
Type::getInt32Ty(Context) // int32_t TryLevel
- SEH4RegistrationTy = StructType::create(FieldTys, "EH4ExceptionRegistration");
- return SEH4RegistrationTy;
+ SEHRegistrationTy = StructType::create(FieldTys, "SEHExceptionRegistration");
+ return SEHRegistrationTy;
// Emit an exception registration record. These are stack allocations with the
@@ -238,62 +255,63 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
StringRef PersonalityName = PersonalityFn->getName();
IRBuilder<> Builder(&F->getEntryBlock(), F->getEntryBlock().begin());
Type *Int8PtrType = Builder.getInt8PtrTy();
- Value *SubRecord = nullptr;
- if (PersonalityName == "__CxxFrameHandler3") {
- Type *RegNodeTy = getCXXEH3RegistrationType();
- Value *RegNode = Builder.CreateAlloca(RegNodeTy);
+ if (Personality == EHPersonality::MSVC_CXX) {
+ RegNodeTy = getCXXEHRegistrationType();
+ RegNode = Builder.CreateAlloca(RegNodeTy);
// FIXME: We can skip this in -GS- mode, when we figure that out.
// SavedESP = llvm.stacksave()
Value *SP = Builder.CreateCall(
Intrinsic::getDeclaration(TheModule, Intrinsic::stacksave), {});
Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
// TryLevel = -1
- Builder.CreateStore(Builder.getInt32(-1),
- Builder.CreateStructGEP(RegNodeTy, RegNode, 2));
+ StateFieldIndex = 2;
+ insertStateNumberStore(RegNode, Builder.GetInsertPoint(), -1);
// Handler = __ehhandler$F
Function *Trampoline = generateLSDAInEAXThunk(F);
- SubRecord = Builder.CreateStructGEP(RegNodeTy, RegNode, 1);
- linkExceptionRegistration(Builder, SubRecord, Trampoline);
- } else if (PersonalityName == "_except_handler3") {
- Type *RegNodeTy = getSEH3RegistrationType();
- Value *RegNode = Builder.CreateAlloca(RegNodeTy);
- // TryLevel = -1
- Builder.CreateStore(Builder.getInt32(-1),
- Builder.CreateStructGEP(RegNodeTy, RegNode, 2));
- // ScopeTable = llvm.x86.seh.lsda(F)
- Value *LSDA = emitEHLSDA(Builder, F);
- Builder.CreateStore(LSDA, Builder.CreateStructGEP(RegNodeTy, RegNode, 1));
- SubRecord = Builder.CreateStructGEP(RegNodeTy, RegNode, 0);
- linkExceptionRegistration(Builder, SubRecord, PersonalityFn);
- } else if (PersonalityName == "_except_handler4") {
- Type *RegNodeTy = getSEH4RegistrationType();
- Value *RegNode = Builder.CreateAlloca(RegNodeTy);
+ Link = Builder.CreateStructGEP(RegNodeTy, RegNode, 1);
+ linkExceptionRegistration(Builder, Trampoline);
+ } else if (Personality == EHPersonality::MSVC_X86SEH) {
+ // If _except_handler4 is in use, some additional guard checks and prologue
+ // stuff is required.
+ bool UseStackGuard = (PersonalityName == "_except_handler4");
+ RegNodeTy = getSEHRegistrationType();
+ RegNode = Builder.CreateAlloca(RegNodeTy);
// SavedESP = llvm.stacksave()
Value *SP = Builder.CreateCall(
Intrinsic::getDeclaration(TheModule, Intrinsic::stacksave), {});
Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
- // TryLevel = -2
- Builder.CreateStore(Builder.getInt32(-2),
- Builder.CreateStructGEP(RegNodeTy, RegNode, 4));
- // FIXME: XOR the LSDA with __security_cookie.
+ // TryLevel = -2 / -1
+ StateFieldIndex = 4;
+ insertStateNumberStore(RegNode, Builder.GetInsertPoint(),
+ UseStackGuard ? -2 : -1);
// ScopeTable = llvm.x86.seh.lsda(F)
Value *FI8 = Builder.CreateBitCast(F, Int8PtrType);
Value *LSDA = Builder.CreateCall(
Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_lsda), FI8);
- Builder.CreateStore(LSDA, Builder.CreateStructGEP(RegNodeTy, RegNode, 1));
- SubRecord = Builder.CreateStructGEP(RegNodeTy, RegNode, 2);
- linkExceptionRegistration(Builder, SubRecord, PersonalityFn);
+ Type *Int32Ty = Type::getInt32Ty(TheModule->getContext());
+ LSDA = Builder.CreatePtrToInt(LSDA, Int32Ty);
+ // If using _except_handler4, xor the address of the table with
+ // __security_cookie.
+ if (UseStackGuard) {
+ Value *Cookie =
+ TheModule->getOrInsertGlobal("__security_cookie", Int32Ty);
+ Value *Val = Builder.CreateLoad(Int32Ty, Cookie);
+ LSDA = Builder.CreateXor(LSDA, Val);
+ }
+ Builder.CreateStore(LSDA, Builder.CreateStructGEP(RegNodeTy, RegNode, 3));
+ Link = Builder.CreateStructGEP(RegNodeTy, RegNode, 2);
+ linkExceptionRegistration(Builder, PersonalityFn);
} else {
llvm_unreachable("unexpected personality function");
- // FIXME: Insert an unlink before all returns.
+ // Insert an unlink before all returns.
for (BasicBlock &BB : *F) {
TerminatorInst *T = BB.getTerminator();
if (!isa<ReturnInst>(T))
- unlinkExceptionRegistration(Builder, SubRecord);
+ unlinkExceptionRegistration(Builder);
@@ -342,33 +360,122 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) {
void WinEHStatePass::linkExceptionRegistration(IRBuilder<> &Builder,
- Value *RegNode, Value *Handler) {
- Type *RegNodeTy = getEHRegistrationType();
+ Value *Handler) {
+ Type *LinkTy = getEHLinkRegistrationType();
// Handler = Handler
Handler = Builder.CreateBitCast(Handler, Builder.getInt8PtrTy());
- Builder.CreateStore(Handler, Builder.CreateStructGEP(RegNodeTy, RegNode, 1));
+ Builder.CreateStore(Handler, Builder.CreateStructGEP(LinkTy, Link, 1));
// Next = [fs:00]
Constant *FSZero =
- Constant::getNullValue(RegNodeTy->getPointerTo()->getPointerTo(257));
+ Constant::getNullValue(LinkTy->getPointerTo()->getPointerTo(257));
Value *Next = Builder.CreateLoad(FSZero);
- Builder.CreateStore(Next, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
- // [fs:00] = RegNode
- Builder.CreateStore(RegNode, FSZero);
+ Builder.CreateStore(Next, Builder.CreateStructGEP(LinkTy, Link, 0));
+ // [fs:00] = Link
+ Builder.CreateStore(Link, FSZero);
-void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder,
- Value *RegNode) {
- // Clone RegNode into the current BB for better address mode folding.
- if (auto *GEP = dyn_cast<GetElementPtrInst>(RegNode)) {
+void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder) {
+ // Clone Link into the current BB for better address mode folding.
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(Link)) {
GEP = cast<GetElementPtrInst>(GEP->clone());
- RegNode = GEP;
+ Link = GEP;
- Type *RegNodeTy = getEHRegistrationType();
- // [fs:00] = RegNode->Next
+ Type *LinkTy = getEHLinkRegistrationType();
+ // [fs:00] = Link->Next
Value *Next =
- Builder.CreateLoad(Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
+ Builder.CreateLoad(Builder.CreateStructGEP(LinkTy, Link, 0));
Constant *FSZero =
- Constant::getNullValue(RegNodeTy->getPointerTo()->getPointerTo(257));
+ Constant::getNullValue(LinkTy->getPointerTo()->getPointerTo(257));
Builder.CreateStore(Next, FSZero);
+void WinEHStatePass::addCXXStateStores(Function &F, MachineModuleInfo &MMI) {
+ WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(&F);
+ calculateWinCXXEHStateNumbers(&F, FuncInfo);
+ // The base state for the parent is -1.
+ addCXXStateStoresToFunclet(RegNode, FuncInfo, F, -1);
+ // Set up RegNodeEscapeIndex
+ int RegNodeEscapeIndex = escapeRegNode(F);
+ // Only insert stores in catch handlers.
+ Constant *FI8 =
+ ConstantExpr::getBitCast(&F, Type::getInt8PtrTy(TheModule->getContext()));
+ for (auto P : FuncInfo.HandlerBaseState) {
+ Function *Handler = const_cast<Function *>(P.first);
+ int BaseState = P.second;
+ IRBuilder<> Builder(&Handler->getEntryBlock(),
+ Handler->getEntryBlock().begin());
+ // FIXME: Find and reuse such a call if present.
+ Value *ParentFP = Builder.CreateCall(FrameAddress, {Builder.getInt32(1)});
+ Value *RecoveredRegNode = Builder.CreateCall(
+ FrameRecover, {FI8, ParentFP, Builder.getInt32(RegNodeEscapeIndex)});
+ RecoveredRegNode =
+ Builder.CreateBitCast(RecoveredRegNode, RegNodeTy->getPointerTo(0));
+ addCXXStateStoresToFunclet(RecoveredRegNode, FuncInfo, *Handler, BaseState);
+ }
+/// Escape RegNode so that we can access it from child handlers. Find the call
+/// to frameescape, if any, in the entry block and append RegNode to the list
+/// of arguments.
+int WinEHStatePass::escapeRegNode(Function &F) {
+ // Find the call to frameescape and extract its arguments.
+ IntrinsicInst *EscapeCall = nullptr;
+ for (Instruction &I : F.getEntryBlock()) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
+ if (II && II->getIntrinsicID() == Intrinsic::frameescape) {
+ EscapeCall = II;
+ break;
+ }
+ }
+ SmallVector<Value *, 8> Args;
+ if (EscapeCall) {
+ auto Ops = EscapeCall->arg_operands();
+ Args.append(Ops.begin(), Ops.end());
+ }
+ Args.push_back(RegNode);
+ // Replace the call (if it exists) with new one. Otherwise, insert at the end
+ // of the entry block.
+ IRBuilder<> Builder(&F.getEntryBlock(),
+ EscapeCall ? EscapeCall : F.getEntryBlock().end());
+ Builder.CreateCall(FrameEscape, Args);
+ if (EscapeCall)
+ EscapeCall->eraseFromParent();
+ return Args.size() - 1;
+void WinEHStatePass::addCXXStateStoresToFunclet(Value *ParentRegNode,
+ WinEHFuncInfo &FuncInfo,
+ Function &F, int BaseState) {
+ // Iterate all the instructions and emit state number stores.
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ // Possibly throwing call instructions have no actions to take after
+ // an unwind. Ensure they are in the -1 state.
+ if (CI->doesNotThrow())
+ continue;
+ insertStateNumberStore(ParentRegNode, CI, BaseState);
+ } else if (auto *II = dyn_cast<InvokeInst>(&I)) {
+ // Look up the state number of the landingpad this unwinds to.
+ LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst();
+ // FIXME: Why does this assertion fail?
+ //assert(FuncInfo.LandingPadStateMap.count(LPI) && "LP has no state!");
+ int State = FuncInfo.LandingPadStateMap[LPI];
+ insertStateNumberStore(ParentRegNode, II, State);
+ }
+ }
+ }
+void WinEHStatePass::insertStateNumberStore(Value *ParentRegNode,
+ Instruction *IP, int State) {
+ IRBuilder<> Builder(IP);
+ Value *StateField =
+ Builder.CreateStructGEP(RegNodeTy, ParentRegNode, StateFieldIndex);
+ Builder.CreateStore(Builder.getInt32(State), StateField);
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
index 36b3b02..500c84d 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
@@ -45,7 +45,8 @@ printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O) {
report_fatal_error("can't handle InlineJT32");
-static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
+static void printExpr(const MCExpr *Expr, const MCAsmInfo *MAI,
+ raw_ostream &OS) {
int Offset = 0;
const MCSymbolRefExpr *SRE;
@@ -60,7 +61,7 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
assert(SRE->getKind() == MCSymbolRefExpr::VK_None);
- OS << SRE->getSymbol();
+ SRE->getSymbol().print(OS, MAI);
if (Offset) {
if (Offset > 0)
@@ -83,5 +84,5 @@ printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
assert(Op.isExpr() && "unknown operand kind in printOperand");
- printExpr(Op.getExpr(), O);
+ printExpr(Op.getExpr(), &MAI, O);
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
index f2d2b37..3178a4e 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
@@ -8,12 +8,11 @@
#include "XCoreMCAsmInfo.h"
-#include "llvm/ADT/StringRef.h"
using namespace llvm;
void XCoreMCAsmInfo::anchor() { }
-XCoreMCAsmInfo::XCoreMCAsmInfo(StringRef TT) {
+XCoreMCAsmInfo::XCoreMCAsmInfo(const Triple &TT) {
SupportsDebugInformation = true;
Data16bitsDirective = "\t.short\t";
Data32bitsDirective = "\t.long\t";
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
index 26df211..39581e4 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
@@ -17,14 +17,14 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
- class StringRef;
- class Target;
+class Triple;
- class XCoreMCAsmInfo : public MCAsmInfoELF {
- void anchor() override;
- public:
- explicit XCoreMCAsmInfo(StringRef TT);
- };
+class XCoreMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+ explicit XCoreMCAsmInfo(const Triple &TT);
} // namespace llvm
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index ce0d39f..f0e4596 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -54,7 +54,7 @@ static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU,
static MCAsmInfo *createXCoreMCAsmInfo(const MCRegisterInfo &MRI,
- StringRef TT) {
+ const Triple &TT) {
MCAsmInfo *MAI = new XCoreMCAsmInfo(TT);
// Initial state of the frame pointer is SP.
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 23e24f2..702056d 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -37,7 +37,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -100,7 +100,7 @@ void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) {
Twine(Sym->getName() + StringRef(".globound")));
OutStreamer->EmitSymbolAttribute(SymGlob, MCSA_Global);
- MCConstantExpr::Create(ATy->getNumElements(),
+ MCConstantExpr::create(ATy->getNumElements(),
if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
GV->hasCommonLinkage()) {
@@ -157,7 +157,8 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
unsigned Size = TD->getTypeAllocSize(C->getType());
if (MAI->hasDotTypeDotSizeDirective()) {
OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
- OutStreamer->EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext));
+ OutStreamer->emitELFSize(cast<MCSymbolELF>(GVSym),
+ MCConstantExpr::create(Size, OutContext));
@@ -201,7 +202,7 @@ printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
MachineBasicBlock *MBB = JTBBs[i];
if (i > 0)
O << ",";
- O << *MBB->getSymbol();
+ MBB->getSymbol()->print(O, MAI);
@@ -217,17 +218,17 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
O << MO.getImm();
case MachineOperand::MO_MachineBasicBlock:
- O << *MO.getMBB()->getSymbol();
+ MO.getMBB()->getSymbol()->print(O, MAI);
case MachineOperand::MO_GlobalAddress:
- O << *getSymbol(MO.getGlobal());
+ getSymbol(MO.getGlobal())->print(O, MAI);
case MachineOperand::MO_ConstantPoolIndex:
O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
<< '_' << MO.getIndex();
case MachineOperand::MO_BlockAddress:
- O << *GetBlockAddressSymbol(MO.getBlockAddress());
+ GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI);
llvm_unreachable("not implemented");
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index f56caec..aa71241 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -1926,7 +1926,8 @@ static inline bool isImmUs4(int64_t val)
/// by AM is legal for this target, for a load/store of the specified type.
XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
+ Type *Ty,
+ unsigned AS) const {
if (Ty->getTypeID() == Type::VoidTyID)
return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 22014ed..97f0494 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -120,7 +120,8 @@ namespace llvm {
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const override;
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
const TargetMachine &TM;
diff --git a/lib/Target/XCore/XCoreMCInstLower.cpp b/lib/Target/XCore/XCoreMCInstLower.cpp
index cffba5f..03c5fa2 100644
--- a/lib/Target/XCore/XCoreMCInstLower.cpp
+++ b/lib/Target/XCore/XCoreMCInstLower.cpp
@@ -65,7 +65,7 @@ MCOperand XCoreMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
llvm_unreachable("<unknown operand type>");
- const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, *Ctx);
+ const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Symbol, Kind, *Ctx);
if (!Offset)
return MCOperand::createExpr(MCSym);
@@ -73,8 +73,8 @@ MCOperand XCoreMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
// Assume offset is never negative.
assert(Offset > 0);
- const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx);
- const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx);
+ const MCConstantExpr *OffsetExpr = MCConstantExpr::create(Offset, *Ctx);
+ const MCBinaryExpr *Add = MCBinaryExpr::createAdd(MCSym, OffsetExpr, *Ctx);
return MCOperand::createExpr(Add);
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 7b7672d..c7c57ab 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -553,7 +553,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
LoadInst *Load = Loads[i];
BasicBlock *BB = Load->getParent();
- AliasAnalysis::Location Loc = AA.getLocation(Load);
+ AliasAnalysis::Location Loc = MemoryLocation::get(Load);
if (AA.canInstructionRangeModRef(BB->front(), *Load, Loc,
return false; // Pointer is invalidated!
@@ -774,7 +774,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
Value *Idx = GetElementPtrInst::Create(
- STy, *AI, Idxs, (*AI)->getName() + "." + utostr(i), Call);
+ STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call);
// TODO: Tell AA about the new values?
Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 92e384a..ef8f42f 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -232,20 +232,20 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
} else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
// Ignore non-volatile loads from local memory. (Atomic is okay here.)
if (!LI->isVolatile()) {
- AliasAnalysis::Location Loc = AA->getLocation(LI);
+ AliasAnalysis::Location Loc = MemoryLocation::get(LI);
if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
// Ignore non-volatile stores to local memory. (Atomic is okay here.)
if (!SI->isVolatile()) {
- AliasAnalysis::Location Loc = AA->getLocation(SI);
+ AliasAnalysis::Location Loc = MemoryLocation::get(SI);
if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
} else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
// Ignore vaargs on local memory.
- AliasAnalysis::Location Loc = AA->getLocation(VI);
+ AliasAnalysis::Location Loc = MemoryLocation::get(VI);
if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 91a5eef..052f1b4 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -389,11 +389,21 @@ private:
class FunctionNode {
- AssertingVH<Function> F;
+ mutable AssertingVH<Function> F;
FunctionNode(Function *F) : F(F) {}
Function *getFunc() const { return F; }
+ /// Replace the reference to the function F by the function G, assuming their
+ /// implementations are equal.
+ void replaceBy(Function *G) const {
+ assert(!(*this < FunctionNode(G)) && !(FunctionNode(G) < *this) &&
+ "The two functions must be equal");
+ F = G;
+ }
void release() { F = 0; }
bool operator<(const FunctionNode &RHS) const {
return (FunctionComparator(F, RHS.getFunc()).compare()) == -1;
@@ -1122,6 +1132,9 @@ private:
/// Replace G with an alias to F. Deletes G.
void writeAlias(Function *F, Function *G);
+ /// Replace function F with function G in the function tree.
+ void replaceFunctionInTree(FnTreeType::iterator &IterToF, Function *G);
/// The set of all distinct functions. Use the insert() and remove() methods
/// to modify it.
FnTreeType FnTree;
@@ -1414,6 +1427,21 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
+/// Replace function F for function G in the map.
+void MergeFunctions::replaceFunctionInTree(FnTreeType::iterator &IterToF,
+ Function *G) {
+ Function *F = IterToF->getFunc();
+ // A total order is already guaranteed otherwise because we process strong
+ // functions before weak functions.
+ assert(((F->mayBeOverridden() && G->mayBeOverridden()) ||
+ (!F->mayBeOverridden() && !G->mayBeOverridden())) &&
+ "Only change functions if both are strong or both are weak");
+ (void)F;
+ IterToF->replaceBy(G);
// Insert a ComparableFunction into the FnTree, or merge it away if equal to one
// that was already inserted.
bool MergeFunctions::insert(Function *NewFunction) {
@@ -1439,6 +1467,22 @@ bool MergeFunctions::insert(Function *NewFunction) {
+ // Impose a total order (by name) on the replacement of functions. This is
+ // important when operating on more than one module independently to prevent
+ // cycles of thunks calling each other when the modules are linked together.
+ //
+ // When one function is weak and the other is strong there is an order imposed
+ // already. We process strong functions before weak functions.
+ if ((OldF.getFunc()->mayBeOverridden() && NewFunction->mayBeOverridden()) ||
+ (!OldF.getFunc()->mayBeOverridden() && !NewFunction->mayBeOverridden()))
+ if (OldF.getFunc()->getName() > NewFunction->getName()) {
+ // Swap the two functions.
+ Function *F = OldF.getFunc();
+ replaceFunctionInTree(Result.first, NewFunction);
+ NewFunction = F;
+ assert(OldF.getFunc() != F && "Must have swapped the functions.");
+ }
// Never thunk a strong function to a weak function.
assert(!OldF.getFunc()->mayBeOverridden() || NewFunction->mayBeOverridden());
@@ -1465,7 +1509,7 @@ void MergeFunctions::remove(Function *F) {
if (Erased) {
DEBUG(dbgs() << "Removed " << F->getName()
<< " from set and deferred it.\n");
- Deferred.push_back(F);
+ Deferred.emplace_back(F);
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 2dafa58..f53eeef 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2149,6 +2149,7 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
if (WillNotOverflowSignedAdd(LHS, RHS, OrigI))
return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(),
+ break;
@@ -2194,6 +2195,7 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
if (WillNotOverflowSignedMul(LHS, RHS, OrigI))
return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(),
+ break;
return false;
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 5aa59c6..e7a4533 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -483,12 +483,17 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
// Fold away bit casts of the loaded value by loading the desired type.
+ // We can do this for BitCastInsts as well as casts from and to pointer types,
+ // as long as those are noops (i.e., the source or dest type have the same
+ // bitwidth as the target's pointers).
if (LI.hasOneUse())
- if (auto *BC = dyn_cast<BitCastInst>(LI.user_back())) {
- LoadInst *NewLoad = combineLoadToNewType(IC, LI, BC->getDestTy());
- BC->replaceAllUsesWith(NewLoad);
- IC.EraseInstFromFunction(*BC);
- return &LI;
+ if (auto* CI = dyn_cast<CastInst>(LI.user_back())) {
+ if (CI->isNoopCast(DL)) {
+ LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
+ CI->replaceAllUsesWith(NewLoad);
+ IC.EraseInstFromFunction(*CI);
+ return &LI;
+ }
// FIXME: We should also canonicalize loads of vectors when their elements are
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index d2fbcdd..f51442a 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -276,72 +276,6 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
return nullptr;
-/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
-/// replaced with RepOp.
-static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
- const TargetLibraryInfo *TLI,
- const DataLayout &DL, DominatorTree *DT,
- AssumptionCache *AC) {
- // Trivial replacement.
- if (V == Op)
- return RepOp;
- Instruction *I = dyn_cast<Instruction>(V);
- if (!I)
- return nullptr;
- // If this is a binary operator, try to simplify it with the replaced op.
- if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) {
- if (B->getOperand(0) == Op)
- return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), DL, TLI);
- if (B->getOperand(1) == Op)
- return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, DL, TLI);
- }
- // Same for CmpInsts.
- if (CmpInst *C = dyn_cast<CmpInst>(I)) {
- if (C->getOperand(0) == Op)
- return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), DL,
- TLI, DT, AC);
- if (C->getOperand(1) == Op)
- return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, DL,
- TLI, DT, AC);
- }
- // TODO: We could hand off more cases to instsimplify here.
- // If all operands are constant after substituting Op for RepOp then we can
- // constant fold the instruction.
- if (Constant *CRepOp = dyn_cast<Constant>(RepOp)) {
- // Build a list of all constant operands.
- SmallVector<Constant*, 8> ConstOps;
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- if (I->getOperand(i) == Op)
- ConstOps.push_back(CRepOp);
- else if (Constant *COp = dyn_cast<Constant>(I->getOperand(i)))
- ConstOps.push_back(COp);
- else
- break;
- }
- // All operands were constants, fold it.
- if (ConstOps.size() == I->getNumOperands()) {
- if (CmpInst *C = dyn_cast<CmpInst>(I))
- return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0],
- ConstOps[1], DL, TLI);
- if (LoadInst *LI = dyn_cast<LoadInst>(I))
- if (!LI->isVolatile())
- return ConstantFoldLoadFromConstPtr(ConstOps[0], DL);
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(), ConstOps,
- DL, TLI);
- }
- }
- return nullptr;
/// foldSelectICmpAndOr - We want to turn:
/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
/// into:
@@ -477,14 +411,6 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
// here, so make sure the select is the only user.
if (ICI->hasOneUse())
if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) {
- // X < MIN ? T : F --> F
- if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT)
- && CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
- return ReplaceInstUsesWith(SI, FalseVal);
- // X > MAX ? T : F --> F
- else if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT)
- && CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
- return ReplaceInstUsesWith(SI, FalseVal);
switch (Pred) {
default: break;
case ICmpInst::ICMP_ULT:
@@ -598,33 +524,6 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
- // If we have an equality comparison then we know the value in one of the
- // arms of the select. See if substituting this value into the arm and
- // simplifying the result yields the same value as the other arm.
- if (Pred == ICmpInst::ICMP_EQ) {
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
- TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
- TrueVal)
- return ReplaceInstUsesWith(SI, FalseVal);
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
- FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
- FalseVal)
- return ReplaceInstUsesWith(SI, FalseVal);
- } else if (Pred == ICmpInst::ICMP_NE) {
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
- FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
- FalseVal)
- return ReplaceInstUsesWith(SI, TrueVal);
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
- TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
- TrueVal)
- return ReplaceInstUsesWith(SI, TrueVal);
- }
// NOTE: if we wanted to, this is where to detect integer MIN/MAX
if (CmpRHS != CmpLHS && isa<Constant>(CmpRHS)) {
@@ -639,7 +538,8 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
- if (unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits()) {
+ {
+ unsigned BitWidth = DL.getTypeSizeInBits(TrueVal->getType());
APInt MinSignedValue = APInt::getSignBit(BitWidth);
Value *X;
const APInt *Y, *C;
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index be49cd1..9d602c6 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1843,7 +1843,7 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
case Instruction::BitCast:
case Instruction::GetElementPtr:
- Users.push_back(I);
+ Users.emplace_back(I);
@@ -1852,7 +1852,7 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
// We can fold eq/ne comparisons with null to false/true, respectively.
if (!ICI->isEquality() || !isa<ConstantPointerNull>(ICI->getOperand(1)))
return false;
- Users.push_back(I);
+ Users.emplace_back(I);
@@ -1878,13 +1878,13 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::objectsize:
- Users.push_back(I);
+ Users.emplace_back(I);
if (isFreeCall(I, TLI)) {
- Users.push_back(I);
+ Users.emplace_back(I);
return false;
@@ -1893,7 +1893,7 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
StoreInst *SI = cast<StoreInst>(I);
if (SI->isVolatile() || SI->getPointerOperand() != PI)
return false;
- Users.push_back(I);
+ Users.emplace_back(I);
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 939e04b..25f78b0 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -106,14 +106,15 @@ static const char *const kAsanUnpoisonStackMemoryName =
static const char *const kAsanOptionDetectUAR =
+static const char *const kAsanAllocaPoison =
+ "__asan_alloca_poison";
+static const char *const kAsanAllocasUnpoison =
+ "__asan_allocas_unpoison";
// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
static const size_t kNumberOfAccessSizes = 5;
static const unsigned kAllocaRzSize = 32;
-static const unsigned kAsanAllocaLeftMagic = 0xcacacacaU;
-static const unsigned kAsanAllocaRightMagic = 0xcbcbcbcbU;
-static const unsigned kAsanAllocaPartialVal1 = 0xcbcbcb00U;
-static const unsigned kAsanAllocaPartialVal2 = 0x000000cbU;
// Command-line flags.
@@ -230,8 +231,6 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
- "Number of instrumented dynamic allocas");
"Number of optimized accesses to global vars");
@@ -402,6 +401,12 @@ struct AddressSanitizer : public FunctionPass {
/// Check if we want (and can) handle this alloca.
bool isInterestingAlloca(AllocaInst &AI);
+ // Check if we have dynamic alloca.
+ bool isDynamicAlloca(AllocaInst &AI) const {
+ return AI.isArrayAllocation() || !AI.isStaticAlloca();
+ }
/// If it is an interesting memory access, return the PointerOperand
/// and set IsWrite/Alignment. Otherwise return nullptr.
Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
@@ -517,6 +522,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
Function *AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
*AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc;
+ Function *AsanAllocaPoisonFunc, *AsanAllocasUnpoisonFunc;
// Stores a place and arguments of poisoning/unpoisoning call for alloca.
struct AllocaPoisonCall {
@@ -527,23 +533,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
SmallVector<AllocaPoisonCall, 8> AllocaPoisonCallVec;
- // Stores left and right redzone shadow addresses for dynamic alloca
- // and pointer to alloca instruction itself.
- // LeftRzAddr is a shadow address for alloca left redzone.
- // RightRzAddr is a shadow address for alloca right redzone.
- struct DynamicAllocaCall {
- AllocaInst *AI;
- Value *LeftRzAddr;
- Value *RightRzAddr;
- bool Poison;
- explicit DynamicAllocaCall(AllocaInst *AI, Value *LeftRzAddr = nullptr,
- Value *RightRzAddr = nullptr)
- : AI(AI),
- LeftRzAddr(LeftRzAddr),
- RightRzAddr(RightRzAddr),
- Poison(true) {}
- };
- SmallVector<DynamicAllocaCall, 1> DynamicAllocaVec;
+ SmallVector<AllocaInst *, 1> DynamicAllocaVec;
+ SmallVector<IntrinsicInst *, 1> StackRestoreVec;
+ AllocaInst *DynamicAllocaLayout = nullptr;
// Maps Value to an AllocaInst from which the Value is originated.
typedef DenseMap<Value *, AllocaInst *> AllocaForValueMapTy;
@@ -586,41 +578,29 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
// Then unpoison everything back before the function returns.
void poisonStack();
+ void createDynamicAllocasInitStorage();
// ----------------------- Visitors.
/// \brief Collect all Ret instructions.
void visitReturnInst(ReturnInst &RI) { RetVec.push_back(&RI); }
- // Unpoison dynamic allocas redzones.
- void unpoisonDynamicAlloca(DynamicAllocaCall &AllocaCall) {
- if (!AllocaCall.Poison) return;
- for (auto Ret : RetVec) {
- IRBuilder<> IRBRet(Ret);
- PointerType *Int32PtrTy = PointerType::getUnqual(IRBRet.getInt32Ty());
- Value *Zero = Constant::getNullValue(IRBRet.getInt32Ty());
- Value *PartialRzAddr = IRBRet.CreateSub(AllocaCall.RightRzAddr,
- ConstantInt::get(IntptrTy, 4));
- IRBRet.CreateStore(
- Zero, IRBRet.CreateIntToPtr(AllocaCall.LeftRzAddr, Int32PtrTy));
- IRBRet.CreateStore(Zero,
- IRBRet.CreateIntToPtr(PartialRzAddr, Int32PtrTy));
- IRBRet.CreateStore(
- Zero, IRBRet.CreateIntToPtr(AllocaCall.RightRzAddr, Int32PtrTy));
- }
+ void unpoisonDynamicAllocasBeforeInst(Instruction *InstBefore,
+ Value *SavedStack) {
+ IRBuilder<> IRB(InstBefore);
+ IRB.CreateCall(AsanAllocasUnpoisonFunc,
+ {IRB.CreateLoad(DynamicAllocaLayout),
+ IRB.CreatePtrToInt(SavedStack, IntptrTy)});
- // Right shift for BigEndian and left shift for LittleEndian.
- Value *shiftAllocaMagic(Value *Val, IRBuilder<> &IRB, Value *Shift) {
- auto &DL = F.getParent()->getDataLayout();
- return DL.isLittleEndian() ? IRB.CreateShl(Val, Shift)
- : IRB.CreateLShr(Val, Shift);
- }
+ // Unpoison dynamic allocas redzones.
+ void unpoisonDynamicAllocas() {
+ for (auto &Ret : RetVec)
+ unpoisonDynamicAllocasBeforeInst(Ret, DynamicAllocaLayout);
- // Compute PartialRzMagic for dynamic alloca call. Since we don't know the
- // size of requested memory until runtime, we should compute it dynamically.
- // If PartialSize is 0, PartialRzMagic would contain kAsanAllocaRightMagic,
- // otherwise it would contain the value that we will use to poison the
- // partial redzone for alloca call.
- Value *computePartialRzMagic(Value *PartialSize, IRBuilder<> &IRB);
+ for (auto &StackRestoreInst : StackRestoreVec)
+ unpoisonDynamicAllocasBeforeInst(StackRestoreInst,
+ StackRestoreInst->getOperand(0));
+ }
// Deploy and poison redzones around dynamic alloca call. To do this, we
// should replace this call with another one with changed parameters and
@@ -632,20 +612,15 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
// addr = tmp + 32 (first 32 bytes are for the left redzone).
// Additional_size is added to make new memory allocation contain not only
// requested memory, but also left, partial and right redzones.
- // After that, we should poison redzones:
- // (1) Left redzone with kAsanAllocaLeftMagic.
- // (2) Partial redzone with the value, computed in runtime by
- // computePartialRzMagic function.
- // (3) Right redzone with kAsanAllocaRightMagic.
- void handleDynamicAllocaCall(DynamicAllocaCall &AllocaCall);
+ void handleDynamicAllocaCall(AllocaInst *AI);
/// \brief Collect Alloca instructions we want (and can) handle.
void visitAllocaInst(AllocaInst &AI) {
if (!ASan.isInterestingAlloca(AI)) return;
StackAlignment = std::max(StackAlignment, AI.getAlignment());
- if (isDynamicAlloca(AI))
- DynamicAllocaVec.push_back(DynamicAllocaCall(&AI));
+ if (ASan.isDynamicAlloca(AI))
+ DynamicAllocaVec.push_back(&AI);
@@ -653,8 +628,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
/// \brief Collect lifetime intrinsic calls to check for use-after-scope
/// errors.
void visitIntrinsicInst(IntrinsicInst &II) {
- if (!ClCheckLifetime) return;
Intrinsic::ID ID = II.getIntrinsicID();
+ if (ID == Intrinsic::stackrestore) StackRestoreVec.push_back(&II);
+ if (!ClCheckLifetime) return;
if (ID != Intrinsic::lifetime_start && ID != Intrinsic::lifetime_end)
// Found lifetime intrinsic, add ASan instrumentation if necessary.
@@ -690,9 +666,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
return true;
- bool isDynamicAlloca(AllocaInst &AI) const {
- return AI.isArrayAllocation() || !AI.isStaticAlloca();
- }
/// Finds alloca where the value comes from.
AllocaInst *findAllocaForValue(Value *V);
void poisonRedZones(ArrayRef<uint8_t> ShadowBytes, IRBuilder<> &IRB,
@@ -811,12 +784,14 @@ bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) {
if (PreviouslySeenAllocaInfo != ProcessedAllocas.end())
return PreviouslySeenAllocaInfo->getSecond();
- bool IsInteresting = (AI.getAllocatedType()->isSized() &&
- // alloca() may be called with 0 size, ignore it.
- getAllocaSizeInBytes(&AI) > 0 &&
- // We are only interested in allocas not promotable to registers.
- // Promotable allocas are common under -O0.
- (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)));
+ bool IsInteresting =
+ (AI.getAllocatedType()->isSized() &&
+ // alloca() may be called with 0 size, ignore it.
+ getAllocaSizeInBytes(&AI) > 0 &&
+ // We are only interested in allocas not promotable to registers.
+ // Promotable allocas are common under -O0.
+ (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI) ||
+ isDynamicAlloca(AI)));
ProcessedAllocas[&AI] = IsInteresting;
return IsInteresting;
@@ -1158,6 +1133,18 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
if (G->hasSection()) {
StringRef Section(G->getSection());
+ // Globals from llvm.metadata aren't emitted, do not instrument them.
+ if (Section == "llvm.metadata") return false;
+ // Callbacks put into the CRT initializer/terminator sections
+ // should not be instrumented.
+ // See
+ // and
+ if (Section.startswith(".CRT")) {
+ DEBUG(dbgs() << "Ignoring a global initializer callback: " << *G << "\n");
+ return false;
+ }
if (TargetTriple.isOSBinFormatMachO()) {
StringRef ParsedSegment, ParsedSection;
unsigned TAA = 0, StubSize = 0;
@@ -1165,8 +1152,8 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(
Section, ParsedSegment, ParsedSection, TAA, TAAParsed, StubSize);
if (!ErrorCode.empty()) {
- report_fatal_error("Invalid section specifier '" + ParsedSection +
- "': " + ErrorCode + ".");
+ assert(false && "Invalid section specifier.");
+ return false;
// Ignore the globals from the __OBJC section. The ObjC runtime assumes
@@ -1196,18 +1183,6 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
return false;
- // Callbacks put into the CRT initializer/terminator sections
- // should not be instrumented.
- // See
- // and
- if (Section.startswith(".CRT")) {
- DEBUG(dbgs() << "Ignoring a global initializer callback: " << *G << "\n");
- return false;
- }
- // Globals from llvm.metadata aren't emitted, do not instrument them.
- if (Section == "llvm.metadata") return false;
return true;
@@ -1617,6 +1592,11 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
AsanUnpoisonStackMemoryFunc = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(kAsanUnpoisonStackMemoryName, IRB.getVoidTy(),
IntptrTy, IntptrTy, nullptr));
+ AsanAllocaPoisonFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
+ AsanAllocasUnpoisonFunc =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ kAsanAllocasUnpoison, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
void FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes,
@@ -1712,15 +1692,24 @@ Value *FunctionStackPoisoner::createAllocaForLayout(
return IRB.CreatePointerCast(Alloca, IntptrTy);
+void FunctionStackPoisoner::createDynamicAllocasInitStorage() {
+ BasicBlock &FirstBB = *F.begin();
+ IRBuilder<> IRB(dyn_cast<Instruction>(FirstBB.begin()));
+ DynamicAllocaLayout = IRB.CreateAlloca(IntptrTy, nullptr);
+ IRB.CreateStore(Constant::getNullValue(IntptrTy), DynamicAllocaLayout);
+ DynamicAllocaLayout->setAlignment(32);
void FunctionStackPoisoner::poisonStack() {
assert(AllocaVec.size() > 0 || DynamicAllocaVec.size() > 0);
- if (ClInstrumentAllocas) {
+ if (ClInstrumentAllocas && DynamicAllocaVec.size() > 0) {
// Handle dynamic allocas.
- for (auto &AllocaCall : DynamicAllocaVec) {
- handleDynamicAllocaCall(AllocaCall);
- unpoisonDynamicAlloca(AllocaCall);
- }
+ createDynamicAllocasInitStorage();
+ for (auto &AI : DynamicAllocaVec)
+ handleDynamicAllocaCall(AI);
+ unpoisonDynamicAllocas();
if (AllocaVec.size() == 0) return;
@@ -1955,78 +1944,25 @@ AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
return Res;
-// Compute PartialRzMagic for dynamic alloca call. PartialRzMagic is
-// constructed from two separate 32-bit numbers: PartialRzMagic = Val1 | Val2.
-// (1) Val1 is resposible for forming base value for PartialRzMagic, containing
-// only 00 for fully addressable and 0xcb for fully poisoned bytes for each
-// 8-byte chunk of user memory respectively.
-// (2) Val2 forms the value for marking first poisoned byte in shadow memory
-// with appropriate value (0x01 - 0x07 or 0xcb if Padding % 8 == 0).
-// Shift = Padding & ~7; // the number of bits we need to shift to access first
-// chunk in shadow memory, containing nonzero bytes.
-// Example:
-// Padding = 21 Padding = 16
-// Shadow: |00|00|05|cb| Shadow: |00|00|cb|cb|
-// ^ ^
-// | |
-// Shift = 21 & ~7 = 16 Shift = 16 & ~7 = 16
-// Val1 = 0xcbcbcbcb << Shift;
-// PartialBits = Padding ? Padding & 7 : 0xcb;
-// Val2 = PartialBits << Shift;
-// Result = Val1 | Val2;
-Value *FunctionStackPoisoner::computePartialRzMagic(Value *PartialSize,
- IRBuilder<> &IRB) {
- PartialSize = IRB.CreateIntCast(PartialSize, IRB.getInt32Ty(), false);
- Value *Shift = IRB.CreateAnd(PartialSize, IRB.getInt32(~7));
- unsigned Val1Int = kAsanAllocaPartialVal1;
- unsigned Val2Int = kAsanAllocaPartialVal2;
- if (!F.getParent()->getDataLayout().isLittleEndian()) {
- Val1Int = sys::getSwappedBytes(Val1Int);
- Val2Int = sys::getSwappedBytes(Val2Int);
- }
- Value *Val1 = shiftAllocaMagic(IRB.getInt32(Val1Int), IRB, Shift);
- Value *PartialBits = IRB.CreateAnd(PartialSize, IRB.getInt32(7));
- // For BigEndian get 0x000000YZ -> 0xYZ000000.
- if (F.getParent()->getDataLayout().isBigEndian())
- PartialBits = IRB.CreateShl(PartialBits, IRB.getInt32(24));
- Value *Val2 = IRB.getInt32(Val2Int);
- Value *Cond =
- IRB.CreateICmpNE(PartialBits, Constant::getNullValue(IRB.getInt32Ty()));
- Val2 = IRB.CreateSelect(Cond, shiftAllocaMagic(PartialBits, IRB, Shift),
- shiftAllocaMagic(Val2, IRB, Shift));
- return IRB.CreateOr(Val1, Val2);
-void FunctionStackPoisoner::handleDynamicAllocaCall(
- DynamicAllocaCall &AllocaCall) {
- AllocaInst *AI = AllocaCall.AI;
- if (!doesDominateAllExits(AI)) {
- // We do not yet handle complex allocas
- AllocaCall.Poison = false;
- return;
- }
+void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) {
IRBuilder<> IRB(AI);
- PointerType *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
const unsigned Align = std::max(kAllocaRzSize, AI->getAlignment());
const uint64_t AllocaRedzoneMask = kAllocaRzSize - 1;
Value *Zero = Constant::getNullValue(IntptrTy);
Value *AllocaRzSize = ConstantInt::get(IntptrTy, kAllocaRzSize);
Value *AllocaRzMask = ConstantInt::get(IntptrTy, AllocaRedzoneMask);
- Value *NotAllocaRzMask = ConstantInt::get(IntptrTy, ~AllocaRedzoneMask);
// Since we need to extend alloca with additional memory to locate
// redzones, and OldSize is number of allocated blocks with
// ElementSize size, get allocated memory size in bytes by
// OldSize * ElementSize.
- unsigned ElementSize =
+ const unsigned ElementSize =
- Value *OldSize = IRB.CreateMul(AI->getArraySize(),
- ConstantInt::get(IntptrTy, ElementSize));
+ Value *OldSize =
+ IRB.CreateMul(IRB.CreateIntCast(AI->getArraySize(), IntptrTy, false),
+ ConstantInt::get(IntptrTy, ElementSize));
// PartialSize = OldSize % 32
Value *PartialSize = IRB.CreateAnd(OldSize, AllocaRzMask);
@@ -2054,43 +1990,20 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(
Value *NewAddress = IRB.CreateAdd(IRB.CreatePtrToInt(NewAlloca, IntptrTy),
ConstantInt::get(IntptrTy, Align));
- Value *NewAddressPtr = IRB.CreateIntToPtr(NewAddress, AI->getType());
- // LeftRzAddress = NewAddress - kAllocaRzSize
- Value *LeftRzAddress = IRB.CreateSub(NewAddress, AllocaRzSize);
- // Poisoning left redzone.
- AllocaCall.LeftRzAddr = ASan.memToShadow(LeftRzAddress, IRB);
- IRB.CreateStore(ConstantInt::get(IRB.getInt32Ty(), kAsanAllocaLeftMagic),
- IRB.CreateIntToPtr(AllocaCall.LeftRzAddr, Int32PtrTy));
+ // Insert __asan_alloca_poison call for new created alloca.
+ IRB.CreateCall(AsanAllocaPoisonFunc, {NewAddress, OldSize});
- // PartialRzAligned = PartialRzAddr & ~AllocaRzMask
- Value *PartialRzAddr = IRB.CreateAdd(NewAddress, OldSize);
- Value *PartialRzAligned = IRB.CreateAnd(PartialRzAddr, NotAllocaRzMask);
+ // Store the last alloca's address to DynamicAllocaLayout. We'll need this
+ // for unpoisoning stuff.
+ IRB.CreateStore(IRB.CreatePtrToInt(NewAlloca, IntptrTy), DynamicAllocaLayout);
- // Poisoning partial redzone.
- Value *PartialRzMagic = computePartialRzMagic(PartialSize, IRB);
- Value *PartialRzShadowAddr = ASan.memToShadow(PartialRzAligned, IRB);
- IRB.CreateStore(PartialRzMagic,
- IRB.CreateIntToPtr(PartialRzShadowAddr, Int32PtrTy));
- // RightRzAddress
- // = (PartialRzAddr + AllocaRzMask) & ~AllocaRzMask
- Value *RightRzAddress = IRB.CreateAnd(
- IRB.CreateAdd(PartialRzAddr, AllocaRzMask), NotAllocaRzMask);
- // Poisoning right redzone.
- AllocaCall.RightRzAddr = ASan.memToShadow(RightRzAddress, IRB);
- IRB.CreateStore(ConstantInt::get(IRB.getInt32Ty(), kAsanAllocaRightMagic),
- IRB.CreateIntToPtr(AllocaCall.RightRzAddr, Int32PtrTy));
+ Value *NewAddressPtr = IRB.CreateIntToPtr(NewAddress, AI->getType());
- // Replace all uses of AddessReturnedByAlloca with NewAddress.
+ // Replace all uses of AddessReturnedByAlloca with NewAddressPtr.
- // We are done. Erase old alloca and store left, partial and right redzones
- // shadow addresses for future unpoisoning.
+ // We are done. Erase old alloca from parent.
- NumInstrumentedDynamicAllocas++;
// isSafeAccess returns true if Addr is always inbounds with respect to its
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 610ff52..05a9c8a 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -146,8 +146,8 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
IRBuilder<> Builder(Inc->getParent(), *Inc);
uint64_t Index = Inc->getIndex()->getZExtValue();
- llvm::Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
- llvm::Value *Count = Builder.CreateLoad(Addr, "pgocount");
+ Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
+ Value *Count = Builder.CreateLoad(Addr, "pgocount");
Count = Builder.CreateAdd(Count, Builder.getInt64(1));
Inc->replaceAllUsesWith(Builder.CreateStore(Count, Addr));
@@ -196,14 +196,17 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
if (It != RegionCounters.end())
return It->second;
- // Move the name variable to the right section.
+ // Move the name variable to the right section. Make sure it is placed in the
+ // same comdat as its associated function. Otherwise, we may get multiple
+ // counters for the same function in certain cases.
+ Function *Fn = Inc->getParent()->getParent();
+ Name->setComdat(Fn->getComdat());
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
LLVMContext &Ctx = M->getContext();
ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
- Function *Fn = Inc->getParent()->getParent();
// Create the counters variable.
auto *Counters = new GlobalVariable(*M, CounterTy, false, Name->getLinkage(),
@@ -212,9 +215,6 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
- // Place the counters in the same comdat section as its parent function.
- // Otherwise, we may get multiple counters for the same function in certain
- // cases.
RegionCounters[Inc->getName()] = Counters;
@@ -263,7 +263,7 @@ void InstrProfiling::emitRegistration() {
if (Options.NoRedZone)
- auto *RuntimeRegisterTy = llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
+ auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
auto *RuntimeRegisterF =
Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
"__llvm_profile_register_function", M);
@@ -310,7 +310,7 @@ void InstrProfiling::emitUses() {
GlobalVariable *LLVMUsed = M->getGlobalVariable("llvm.used");
- std::vector<Constant*> MergedVars;
+ std::vector<Constant *> MergedVars;
if (LLVMUsed) {
// Collect the existing members of llvm.used.
ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
@@ -323,13 +323,13 @@ void InstrProfiling::emitUses() {
// Add uses for our data.
for (auto *Value : UsedVars)
- ConstantExpr::getBitCast(cast<llvm::Constant>(Value), i8PTy));
+ ConstantExpr::getBitCast(cast<Constant>(Value), i8PTy));
// Recreate llvm.used.
ArrayType *ATy = ArrayType::get(i8PTy, MergedVars.size());
- LLVMUsed = new llvm::GlobalVariable(
- *M, ATy, false, llvm::GlobalValue::AppendingLinkage,
- llvm::ConstantArray::get(ATy, MergedVars), "llvm.used");
+ LLVMUsed =
+ new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage,
+ ConstantArray::get(ATy, MergedVars), "llvm.used");
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 2a3139f..e7731ad 100644
--- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -200,7 +200,7 @@ static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load,
bool SawRelease = false;
// Get the location associated with Load.
- AliasAnalysis::Location Loc = AA->getLocation(Load);
+ AliasAnalysis::Location Loc = MemoryLocation::get(Load);
// Walk down to find the store and the release, which may be in either order.
for (auto I = std::next(BasicBlock::iterator(Load)),
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index d1302c6..79624b2 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -113,10 +113,11 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
Value *Condition = SI->getCondition();
if (!Condition->getType()->isVectorTy()) {
- if (Constant *C = LVI->getConstantOnEdge(Condition, P->getIncomingBlock(i), BB, P)) {
- if (C == ConstantInt::getTrue(Condition->getType())) {
+ if (Constant *C = LVI->getConstantOnEdge(
+ Condition, P->getIncomingBlock(i), BB, P)) {
+ if (C->isOneValue()) {
V = SI->getTrueValue();
- } else {
+ } else if (C->isZeroValue()) {
V = SI->getFalseValue();
// Once LVI learns to handle vector types, we could also add support
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 01952cf..eb48a76 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -197,11 +197,11 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
static AliasAnalysis::Location
getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- return AA.getLocation(SI);
+ return MemoryLocation::get(SI);
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
// memcpy/memmove/memset.
- AliasAnalysis::Location Loc = AA.getLocationForDest(MI);
+ AliasAnalysis::Location Loc = MemoryLocation::getForDest(MI);
return Loc;
@@ -231,7 +231,7 @@ getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
// The only instructions that both read and write are the mem transfer
// instructions (memcpy/memmove).
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst))
- return AA.getLocationForSource(MTI);
+ return MemoryLocation::getForSource(MTI);
return AliasAnalysis::Location();
@@ -815,11 +815,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
if (!L->isUnordered()) // Be conservative with atomic/volatile load
- LoadedLoc = AA->getLocation(L);
+ LoadedLoc = MemoryLocation::get(L);
} else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
- LoadedLoc = AA->getLocation(V);
+ LoadedLoc = MemoryLocation::get(V);
} else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
- LoadedLoc = AA->getLocationForSource(MTI);
+ LoadedLoc = MemoryLocation::getForSource(MTI);
} else if (!BBI->mayReadFromMemory()) {
// Instruction doesn't read memory. Note that stores that weren't removed
// above will hit this case.
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 600589c..359a616 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -68,6 +68,22 @@ static cl::opt<bool> VerifyIndvars(
static cl::opt<bool> ReduceLiveIVs("liv-reduce", cl::Hidden,
cl::desc("Reduce live induction variables."));
+enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, AlwaysRepl };
+static cl::opt<ReplaceExitVal> ReplaceExitValue(
+ "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
+ cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
+ cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
+ clEnumValN(OnlyCheapRepl, "cheap",
+ "only replace exit value when the cost is cheap"),
+ clEnumValN(AlwaysRepl, "always",
+ "always replace exit value whenever possible"),
+ clEnumValEnd));
+namespace {
+struct RewritePhi;
namespace {
class IndVarSimplify : public LoopPass {
LoopInfo *LI;
@@ -112,6 +128,7 @@ namespace {
void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM);
+ bool CanLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
@@ -464,6 +481,21 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
+namespace {
+// Collect information about PHI nodes which can be transformed in
+// RewriteLoopExitValues.
+struct RewritePhi {
+ PHINode *PN;
+ unsigned Ith; // Ith incoming value.
+ Value *Val; // Exit value after expansion.
+ bool HighCost; // High Cost when expansion.
+ bool SafePhi; // LCSSASafePhiForRAUW.
+ RewritePhi(PHINode *P, unsigned I, Value *V, bool H, bool S)
+ : PN(P), Ith(I), Val(V), HighCost(H), SafePhi(S) {}
// RewriteLoopExitValues - Optimize IV users outside the loop.
// As a side effect, reduces the amount of IV processing within the loop.
@@ -486,6 +518,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
SmallVector<BasicBlock*, 8> ExitBlocks;
+ SmallVector<RewritePhi, 8> RewritePhiSet;
// Find all values that are computed inside the loop, but used outside of it.
// Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
// the exit blocks of the loop to find them.
@@ -604,23 +637,44 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
- Changed = true;
- ++NumReplaced;
+ bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L);
- PN->setIncomingValue(i, ExitVal);
+ // Collect all the candidate PHINodes to be rewritten.
+ RewritePhiSet.push_back(
+ RewritePhi(PN, i, ExitVal, HighCost, LCSSASafePhiForRAUW));
+ }
+ }
+ }
- // If this instruction is dead now, delete it. Don't do it now to avoid
- // invalidating iterators.
- if (isInstructionTriviallyDead(Inst, TLI))
- DeadInsts.push_back(Inst);
+ bool LoopCanBeDel = CanLoopBeDeleted(L, RewritePhiSet);
- // If we determined that this PHI is safe to replace even if an LCSSA
- // PHI, do so.
- if (LCSSASafePhiForRAUW) {
- PN->replaceAllUsesWith(ExitVal);
- PN->eraseFromParent();
- }
- }
+ // Transformation.
+ for (const RewritePhi &Phi : RewritePhiSet) {
+ PHINode *PN = Phi.PN;
+ Value *ExitVal = Phi.Val;
+ // Only do the rewrite when the ExitValue can be expanded cheaply.
+ // If LoopCanBeDel is true, rewrite exit value aggressively.
+ if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
+ DeadInsts.push_back(ExitVal);
+ continue;
+ }
+ Changed = true;
+ ++NumReplaced;
+ Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
+ PN->setIncomingValue(Phi.Ith, ExitVal);
+ // If this instruction is dead now, delete it. Don't do it now to avoid
+ // invalidating iterators.
+ if (isInstructionTriviallyDead(Inst, TLI))
+ DeadInsts.push_back(Inst);
+ // If we determined that this PHI is safe to replace even if an LCSSA
+ // PHI, do so.
+ if (Phi.SafePhi) {
+ PN->replaceAllUsesWith(ExitVal);
+ PN->eraseFromParent();
@@ -629,6 +683,65 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
+/// CanLoopBeDeleted - Check whether it is possible to delete the loop after
+/// rewriting exit value. If it is possible, ignore ReplaceExitValue and
+/// do rewriting aggressively.
+bool IndVarSimplify::CanLoopBeDeleted(
+ Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ // If there is no preheader, the loop will not be deleted.
+ if (!Preheader)
+ return false;
+ // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
+ // We obviate multiple ExitingBlocks case for simplicity.
+ // TODO: If we see testcase with multiple ExitingBlocks can be deleted
+ // after exit value rewriting, we can enhance the logic here.
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+ if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
+ return false;
+ BasicBlock *ExitBlock = ExitBlocks[0];
+ BasicBlock::iterator BI = ExitBlock->begin();
+ while (PHINode *P = dyn_cast<PHINode>(BI)) {
+ Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
+ // If the Incoming value of P is found in RewritePhiSet, we know it
+ // could be rewritten to use a loop invariant value in transformation
+ // phase later. Skip it in the loop invariant check below.
+ bool found = false;
+ for (const RewritePhi &Phi : RewritePhiSet) {
+ unsigned i = Phi.Ith;
+ if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
+ found = true;
+ break;
+ }
+ }
+ Instruction *I;
+ if (!found && (I = dyn_cast<Instruction>(Incoming)))
+ if (!L->hasLoopInvariantOperands(I))
+ return false;
+ ++BI;
+ }
+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+ LI != LE; ++LI) {
+ for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); BI != BE;
+ ++BI) {
+ if (BI->mayHaveSideEffects())
+ return false;
+ }
+ }
+ return true;
// IV Widening - Extend the width of an IV to cover its widest uses.
@@ -989,7 +1102,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
IRBuilder<> Builder(WidePhi->getParent()->getFirstInsertionPt());
Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
- DeadInsts.push_back(UsePhi);
+ DeadInsts.emplace_back(UsePhi);
DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
<< " to " << *WidePhi << "\n");
@@ -1022,7 +1135,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
<< " replaced by " << *DU.WideDef << "\n");
- DeadInsts.push_back(DU.NarrowUse);
+ DeadInsts.emplace_back(DU.NarrowUse);
// Now that the extend is gone, we want to expose it's uses for potential
// further simplification. We don't need to directly inform SimplifyIVUsers
@@ -1075,7 +1188,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
if (WideAddRec != SE->getSCEV(WideUse)) {
DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
<< ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
- DeadInsts.push_back(WideUse);
+ DeadInsts.emplace_back(WideUse);
return nullptr;
@@ -1172,7 +1285,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// WidenIVUse may have removed the def-use edge.
if (DU.NarrowDef->use_empty())
- DeadInsts.push_back(DU.NarrowDef);
+ DeadInsts.emplace_back(DU.NarrowDef);
return WidePhi;
@@ -1867,7 +1980,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// loop into any instructions outside of the loop that use the final values of
// the current expressions.
- if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ if (ReplaceExitValue != NeverRepl &&
+ !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
RewriteLoopExitValues(L, Rewriter);
// Eliminate redundant IV cycles.
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 584c7ae..4b59f3d 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -811,7 +811,7 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
if (Instruction *U = dyn_cast<Instruction>(O)) {
O = nullptr;
if (U->use_empty())
- DeadInsts.push_back(U);
+ DeadInsts.emplace_back(U);
@@ -2917,7 +2917,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
- DeadInsts.push_back(Inc.IVOperand);
+ DeadInsts.emplace_back(Inc.IVOperand);
// If LSR created a new, wider phi, we may also replace its postinc. We only
// do this if we also found a wide value for the head of the chain.
@@ -2939,7 +2939,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
Phi->replaceUsesOfWith(PostIncV, IVOper);
- DeadInsts.push_back(PostIncV);
+ DeadInsts.emplace_back(PostIncV);
@@ -4594,7 +4594,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// form, update the ICmp's other operand.
if (LU.Kind == LSRUse::ICmpZero) {
ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
- DeadInsts.push_back(CI->getOperand(1));
+ DeadInsts.emplace_back(CI->getOperand(1));
assert(!F.BaseGV && "ICmp does not support folding a global value and "
"a scale at the same time!");
if (F.Scale == -1) {
@@ -4737,7 +4737,7 @@ void LSRInstance::Rewrite(const LSRFixup &LF,
LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
- DeadInsts.push_back(LF.OperandValToReplace);
+ DeadInsts.emplace_back(LF.OperandValToReplace);
/// ImplementSolution - Rewrite all the fixup locations with new values,
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index ccafd10..4ccbfc9 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -38,25 +38,25 @@ using namespace llvm;
#define DEBUG_TYPE "loop-unroll"
static cl::opt<unsigned>
-UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
- cl::desc("The cut-off point for automatic loop unrolling"));
+ UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
+ cl::desc("The baseline cost threshold for loop unrolling"));
+static cl::opt<unsigned> UnrollPercentDynamicCostSavedThreshold(
+ "unroll-percent-dynamic-cost-saved-threshold", cl::init(20), cl::Hidden,
+ cl::desc("The percentage of estimated dynamic cost which must be saved by "
+ "unrolling to allow unrolling up to the max threshold."));
+static cl::opt<unsigned> UnrollDynamicCostSavingsDiscount(
+ "unroll-dynamic-cost-savings-discount", cl::init(2000), cl::Hidden,
+ cl::desc("This is the amount discounted from the total unroll cost when "
+ "the unrolled form has a high dynamic cost savings (triggered by "
+ "the '-unroll-perecent-dynamic-cost-saved-threshold' flag)."));
static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
"unroll-max-iteration-count-to-analyze", cl::init(0), cl::Hidden,
cl::desc("Don't allow loop unrolling to simulate more than this number of"
"iterations when checking full unroll profitability"));
-static cl::opt<unsigned> UnrollMinPercentOfOptimized(
- "unroll-percent-of-optimized-for-complete-unroll", cl::init(20), cl::Hidden,
- cl::desc("If complete unrolling could trigger further optimizations, and, "
- "by that, remove the given percent of instructions, perform the "
- "complete unroll even if it's beyond the threshold"));
-static cl::opt<unsigned> UnrollAbsoluteThreshold(
- "unroll-absolute-threshold", cl::init(2000), cl::Hidden,
- cl::desc("Don't unroll if the unrolled size is bigger than this threshold,"
- " even if we can remove big portion of instructions later."));
static cl::opt<unsigned>
UnrollCount("unroll-count", cl::init(0), cl::Hidden,
cl::desc("Use this unroll count for all loops including those with "
@@ -82,16 +82,18 @@ namespace {
static char ID; // Pass ID, replacement for typeid
LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) {
CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
- CurrentAbsoluteThreshold = UnrollAbsoluteThreshold;
- CurrentMinPercentOfOptimized = UnrollMinPercentOfOptimized;
+ CurrentPercentDynamicCostSavedThreshold =
+ UnrollPercentDynamicCostSavedThreshold;
+ CurrentDynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R;
UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
- UserAbsoluteThreshold = (UnrollAbsoluteThreshold.getNumOccurrences() > 0);
- UserPercentOfOptimized =
- (UnrollMinPercentOfOptimized.getNumOccurrences() > 0);
+ UserPercentDynamicCostSavedThreshold =
+ (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0);
+ UserDynamicCostSavingsDiscount =
+ (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0);
UserAllowPartial = (P != -1) ||
(UnrollAllowPartial.getNumOccurrences() > 0);
UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0);
@@ -115,18 +117,18 @@ namespace {
unsigned CurrentCount;
unsigned CurrentThreshold;
- unsigned CurrentAbsoluteThreshold;
- unsigned CurrentMinPercentOfOptimized;
- bool CurrentAllowPartial;
- bool CurrentRuntime;
- bool UserCount; // CurrentCount is user-specified.
- bool UserThreshold; // CurrentThreshold is user-specified.
- bool UserAbsoluteThreshold; // CurrentAbsoluteThreshold is
- // user-specified.
- bool UserPercentOfOptimized; // CurrentMinPercentOfOptimized is
- // user-specified.
- bool UserAllowPartial; // CurrentAllowPartial is user-specified.
- bool UserRuntime; // CurrentRuntime is user-specified.
+ unsigned CurrentPercentDynamicCostSavedThreshold;
+ unsigned CurrentDynamicCostSavingsDiscount;
+ bool CurrentAllowPartial;
+ bool CurrentRuntime;
+ // Flags for whether the 'current' settings are user-specified.
+ bool UserCount;
+ bool UserThreshold;
+ bool UserPercentDynamicCostSavedThreshold;
+ bool UserDynamicCostSavingsDiscount;
+ bool UserAllowPartial;
+ bool UserRuntime;
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
@@ -156,8 +158,9 @@ namespace {
void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI,
TargetTransformInfo::UnrollingPreferences &UP) {
UP.Threshold = CurrentThreshold;
- UP.AbsoluteThreshold = CurrentAbsoluteThreshold;
- UP.MinPercentOfOptimized = CurrentMinPercentOfOptimized;
+ UP.PercentDynamicCostSavedThreshold =
+ CurrentPercentDynamicCostSavedThreshold;
+ UP.DynamicCostSavingsDiscount = CurrentDynamicCostSavingsDiscount;
UP.OptSizeThreshold = OptSizeUnrollThreshold;
UP.PartialThreshold = CurrentThreshold;
UP.PartialOptSizeThreshold = OptSizeUnrollThreshold;
@@ -186,8 +189,8 @@ namespace {
void selectThresholds(const Loop *L, bool HasPragma,
const TargetTransformInfo::UnrollingPreferences &UP,
unsigned &Threshold, unsigned &PartialThreshold,
- unsigned &AbsoluteThreshold,
- unsigned &PercentOfOptimizedForCompleteUnroll) {
+ unsigned &PercentDynamicCostSavedThreshold,
+ unsigned &DynamicCostSavingsDiscount) {
// Determine the current unrolling threshold. While this is
// normally set from UnrollThreshold, it is overridden to a
// smaller value if the current function is marked as
@@ -195,11 +198,13 @@ namespace {
// specified.
Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold;
- AbsoluteThreshold = UserAbsoluteThreshold ? CurrentAbsoluteThreshold
- : UP.AbsoluteThreshold;
- PercentOfOptimizedForCompleteUnroll = UserPercentOfOptimized
- ? CurrentMinPercentOfOptimized
- : UP.MinPercentOfOptimized;
+ PercentDynamicCostSavedThreshold =
+ UserPercentDynamicCostSavedThreshold
+ ? CurrentPercentDynamicCostSavedThreshold
+ : UP.PercentDynamicCostSavedThreshold;
+ DynamicCostSavingsDiscount = UserDynamicCostSavingsDiscount
+ ? CurrentDynamicCostSavingsDiscount
+ : UP.DynamicCostSavingsDiscount;
if (!UserThreshold &&
@@ -220,9 +225,9 @@ namespace {
bool canUnrollCompletely(Loop *L, unsigned Threshold,
- unsigned AbsoluteThreshold, uint64_t UnrolledSize,
- unsigned NumberOfOptimizedInstructions,
- unsigned PercentOfOptimizedForCompleteUnroll);
+ unsigned PercentDynamicCostSavedThreshold,
+ unsigned DynamicCostSavingsDiscount,
+ uint64_t UnrolledCost, uint64_t RolledDynamicCost);
@@ -246,187 +251,6 @@ Pass *llvm::createSimpleLoopUnrollPass() {
namespace {
-/// \brief SCEV expressions visitor used for finding expressions that would
-/// become constants if the loop L is unrolled.
-struct FindConstantPointers {
- /// \brief Shows whether the expression is ConstAddress+Constant or not.
- bool IndexIsConstant;
- /// \brief Used for filtering out SCEV expressions with two or more AddRec
- /// subexpressions.
- ///
- /// Used to filter out complicated SCEV expressions, having several AddRec
- /// sub-expressions. We don't handle them, because unrolling one loop
- /// would help to replace only one of these inductions with a constant, and
- /// consequently, the expression would remain non-constant.
- bool HaveSeenAR;
- /// \brief If the SCEV expression becomes ConstAddress+Constant, this value
- /// holds ConstAddress. Otherwise, it's nullptr.
- Value *BaseAddress;
- /// \brief The loop, which we try to completely unroll.
- const Loop *L;
- ScalarEvolution &SE;
- FindConstantPointers(const Loop *L, ScalarEvolution &SE)
- : IndexIsConstant(true), HaveSeenAR(false), BaseAddress(nullptr),
- L(L), SE(SE) {}
- /// Examine the given expression S and figure out, if it can be a part of an
- /// expression, that could become a constant after the loop is unrolled.
- /// The routine sets IndexIsConstant and HaveSeenAR according to the analysis
- /// results.
- /// \returns true if we need to examine subexpressions, and false otherwise.
- bool follow(const SCEV *S) {
- if (const SCEVUnknown *SC = dyn_cast<SCEVUnknown>(S)) {
- // We've reached the leaf node of SCEV, it's most probably just a
- // variable.
- // If it's the only one SCEV-subexpression, then it might be a base
- // address of an index expression.
- // If we've already recorded base address, then just give up on this SCEV
- // - it's too complicated.
- if (BaseAddress) {
- IndexIsConstant = false;
- return false;
- }
- BaseAddress = SC->getValue();
- return false;
- }
- if (isa<SCEVConstant>(S))
- return false;
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- // If the current SCEV expression is AddRec, and its loop isn't the loop
- // we are about to unroll, then we won't get a constant address after
- // unrolling, and thus, won't be able to eliminate the load.
- if (AR->getLoop() != L) {
- IndexIsConstant = false;
- return false;
- }
- // We don't handle multiple AddRecs here, so give up in this case.
- if (HaveSeenAR) {
- IndexIsConstant = false;
- return false;
- }
- HaveSeenAR = true;
- }
- // Continue traversal.
- return true;
- }
- bool isDone() const { return !IndexIsConstant; }
-} // End anonymous namespace.
-namespace {
-/// \brief A cache of SCEV results used to optimize repeated queries to SCEV on
-/// the same set of instructions.
-/// The primary cost this saves is the cost of checking the validity of a SCEV
-/// every time it is looked up. However, in some cases we can provide a reduced
-/// and especially useful model for an instruction based upon SCEV that is
-/// non-trivial to compute but more useful to clients.
-class SCEVCache {
- /// \brief Struct to represent a GEP whose start and step are known fixed
- /// offsets from a base address due to SCEV's analysis.
- struct GEPDescriptor {
- Value *BaseAddr = nullptr;
- unsigned Start = 0;
- unsigned Step = 0;
- };
- Optional<GEPDescriptor> getGEPDescriptor(GetElementPtrInst *GEP);
- SCEVCache(const Loop &L, ScalarEvolution &SE) : L(L), SE(SE) {}
- const Loop &L;
- ScalarEvolution &SE;
- SmallDenseMap<GetElementPtrInst *, GEPDescriptor> GEPDescriptors;
-} // End anonymous namespace.
-/// \brief Get a simplified descriptor for a GEP instruction.
-/// Where possible, this produces a simplified descriptor for a GEP instruction
-/// using SCEV analysis of the containing loop. If this isn't possible, it
-/// returns an empty optional.
-/// The model is a base address, an initial offset, and a per-iteration step.
-/// This fits very common patterns of GEPs inside loops and is something we can
-/// use to simulate the behavior of a particular iteration of a loop.
-/// This is a cached interface. The first call may do non-trivial work to
-/// compute the result, but all subsequent calls will return a fast answer
-/// based on a cached result. This includes caching negative results.
-SCEVCache::getGEPDescriptor(GetElementPtrInst *GEP) {
- decltype(GEPDescriptors)::iterator It;
- bool Inserted;
- std::tie(It, Inserted) = GEPDescriptors.insert({GEP, {}});
- if (!Inserted) {
- if (!It->second.BaseAddr)
- return None;
- return It->second;
- }
- // We've inserted a new record into the cache, so compute the GEP descriptor
- // if possible.
- Value *V = cast<Value>(GEP);
- if (!SE.isSCEVable(V->getType()))
- return None;
- const SCEV *S = SE.getSCEV(V);
- // FIXME: It'd be nice if the worklist and set used by the
- // SCEVTraversal could be re-used between loop iterations, but the
- // interface doesn't support that. There is no way to clear the visited
- // sets between uses.
- FindConstantPointers Visitor(&L, SE);
- SCEVTraversal<FindConstantPointers> T(Visitor);
- // Try to find (BaseAddress+Step+Offset) tuple.
- // If succeeded, save it to the cache - it might help in folding
- // loads.
- T.visitAll(S);
- if (!Visitor.IndexIsConstant || !Visitor.BaseAddress)
- return None;
- const SCEV *BaseAddrSE = SE.getSCEV(Visitor.BaseAddress);
- if (BaseAddrSE->getType() != S->getType())
- return None;
- const SCEV *OffSE = SE.getMinusSCEV(S, BaseAddrSE);
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OffSE);
- if (!AR)
- return None;
- const SCEVConstant *StepSE =
- dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE));
- const SCEVConstant *StartSE = dyn_cast<SCEVConstant>(AR->getStart());
- if (!StepSE || !StartSE)
- return None;
- // Check and skip caching if doing so would require lots of bits to
- // avoid overflow.
- APInt Start = StartSE->getValue()->getValue();
- APInt Step = StepSE->getValue()->getValue();
- if (Start.getActiveBits() > 32 || Step.getActiveBits() > 32)
- return None;
- // We found a cacheable SCEV model for the GEP.
- It->second.BaseAddr = Visitor.BaseAddress;
- It->second.Start = Start.getLimitedValue();
- It->second.Step = Step.getLimitedValue();
- return It->second;
-namespace {
// This class is used to get an estimate of the optimization effects that we
// could get from complete loop unrolling. It comes from the fact that some
// loads might be replaced with concrete constant values and that could trigger
@@ -446,17 +270,31 @@ namespace {
class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> {
typedef InstVisitor<UnrolledInstAnalyzer, bool> Base;
friend class InstVisitor<UnrolledInstAnalyzer, bool>;
+ struct SimplifiedAddress {
+ Value *Base = nullptr;
+ ConstantInt *Offset = nullptr;
+ };
UnrolledInstAnalyzer(unsigned Iteration,
DenseMap<Value *, Constant *> &SimplifiedValues,
- SCEVCache &SC)
- : Iteration(Iteration), SimplifiedValues(SimplifiedValues), SC(SC) {}
+ const Loop *L, ScalarEvolution &SE)
+ : Iteration(Iteration), SimplifiedValues(SimplifiedValues), L(L), SE(SE) {
+ IterationNumber = SE.getConstant(APInt(64, Iteration));
+ }
// Allow access to the initial visit method.
using Base::visit;
+ /// \brief A cache of pointer bases and constant-folded offsets corresponding
+ /// to GEP (or derived from GEP) instructions.
+ ///
+ /// In order to find the base pointer one needs to perform non-trivial
+ /// traversal of the corresponding SCEV expression, so it's good to have the
+ /// results saved.
+ DenseMap<Value *, SimplifiedAddress> SimplifiedAddresses;
/// \brief Number of currently simulated iteration.
/// If an expression is ConstAddress+Constant, then the Constant is
@@ -464,18 +302,71 @@ private:
/// SCEVGEPCache.
unsigned Iteration;
- // While we walk the loop instructions, we we build up and maintain a mapping
- // of simplified values specific to this iteration. The idea is to propagate
- // any special information we have about loads that can be replaced with
- // constants after complete unrolling, and account for likely simplifications
- // post-unrolling.
+ /// \brief SCEV expression corresponding to number of currently simulated
+ /// iteration.
+ const SCEV *IterationNumber;
+ /// \brief A Value->Constant map for keeping values that we managed to
+ /// constant-fold on the given iteration.
+ ///
+ /// While we walk the loop instructions, we build up and maintain a mapping
+ /// of simplified values specific to this iteration. The idea is to propagate
+ /// any special information we have about loads that can be replaced with
+ /// constants after complete unrolling, and account for likely simplifications
+ /// post-unrolling.
DenseMap<Value *, Constant *> &SimplifiedValues;
- // We use a cache to wrap all our SCEV queries.
- SCEVCache &SC;
+ const Loop *L;
+ ScalarEvolution &SE;
+ /// \brief Try to simplify instruction \param I using its SCEV expression.
+ ///
+ /// The idea is that some AddRec expressions become constants, which then
+ /// could trigger folding of other instructions. However, that only happens
+ /// for expressions whose start value is also constant, which isn't always the
+ /// case. In another common and important case the start value is just some
+ /// address (i.e. SCEVUnknown) - in this case we compute the offset and save
+ /// it along with the base address instead.
+ bool simplifyInstWithSCEV(Instruction *I) {
+ if (!SE.isSCEVable(I->getType()))
+ return false;
+ const SCEV *S = SE.getSCEV(I);
+ if (auto *SC = dyn_cast<SCEVConstant>(S)) {
+ SimplifiedValues[I] = SC->getValue();
+ return true;
+ }
+ auto *AR = dyn_cast<SCEVAddRecExpr>(S);
+ if (!AR)
+ return false;
+ const SCEV *ValueAtIteration = AR->evaluateAtIteration(IterationNumber, SE);
+ // Check if the AddRec expression becomes a constant.
+ if (auto *SC = dyn_cast<SCEVConstant>(ValueAtIteration)) {
+ SimplifiedValues[I] = SC->getValue();
+ return true;
+ }
+ // Check if the offset from the base address becomes a constant.
+ auto *Base = dyn_cast<SCEVUnknown>(SE.getPointerBase(S));
+ if (!Base)
+ return false;
+ auto *Offset =
+ dyn_cast<SCEVConstant>(SE.getMinusSCEV(ValueAtIteration, Base));
+ if (!Offset)
+ return false;
+ SimplifiedAddress Address;
+ Address.Base = Base->getValue();
+ Address.Offset = Offset->getValue();
+ SimplifiedAddresses[I] = Address;
+ return true;
+ }
/// Base case for the instruction visitor.
- bool visitInstruction(Instruction &I) { return false; };
+ bool visitInstruction(Instruction &I) {
+ return simplifyInstWithSCEV(&I);
+ }
/// TODO: Add visitors for other instruction types, e.g. ZExt, SExt.
@@ -492,6 +383,7 @@ private:
if (!isa<Constant>(RHS))
if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
RHS = SimpleRHS;
Value *SimpleV = nullptr;
const DataLayout &DL = I.getModule()->getDataLayout();
if (auto FI = dyn_cast<FPMathOperator>(&I))
@@ -503,24 +395,21 @@ private:
if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
SimplifiedValues[&I] = C;
- return SimpleV;
+ if (SimpleV)
+ return true;
+ return Base::visitBinaryOperator(I);
/// Try to fold load I.
bool visitLoad(LoadInst &I) {
Value *AddrOp = I.getPointerOperand();
- if (!isa<Constant>(AddrOp))
- if (Constant *SimplifiedAddrOp = SimplifiedValues.lookup(AddrOp))
- AddrOp = SimplifiedAddrOp;
- auto *GEP = dyn_cast<GetElementPtrInst>(AddrOp);
- if (!GEP)
- return false;
- auto OptionalGEPDesc = SC.getGEPDescriptor(GEP);
- if (!OptionalGEPDesc)
+ auto AddressIt = SimplifiedAddresses.find(AddrOp);
+ if (AddressIt == SimplifiedAddresses.end())
return false;
+ ConstantInt *SimplifiedAddrOp = AddressIt->second.Offset;
- auto GV = dyn_cast<GlobalVariable>(OptionalGEPDesc->BaseAddr);
+ auto *GV = dyn_cast<GlobalVariable>(AddressIt->second.Base);
// We're only interested in loads that can be completely folded to a
// constant.
if (!GV || !GV->hasInitializer())
@@ -531,13 +420,10 @@ private:
if (!CDS)
return false;
- // This calculation should never overflow because we bound Iteration quite
- // low and both the start and step are 32-bit integers. We use signed
- // integers so that UBSan will catch if a bug sneaks into the code.
int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;
- int64_t Index = ((int64_t)OptionalGEPDesc->Start +
- (int64_t)OptionalGEPDesc->Step * (int64_t)Iteration) /
- ElemSize;
+ assert(SimplifiedAddrOp->getValue().getActiveBits() < 64 &&
+ "Unexpectedly large index value.");
+ int64_t Index = SimplifiedAddrOp->getSExtValue() / ElemSize;
if (Index >= CDS->getNumElements()) {
// FIXME: For now we conservatively ignore out of bound accesses, but
// we're allowed to perform the optimization in this case.
@@ -556,11 +442,12 @@ private:
namespace {
struct EstimatedUnrollCost {
- /// \brief Count the number of optimized instructions.
- unsigned NumberOfOptimizedInstructions;
+ /// \brief The estimated cost after unrolling.
+ unsigned UnrolledCost;
- /// \brief Count the total number of instructions.
- unsigned UnrolledLoopSize;
+ /// \brief The estimated dynamic cost of executing the instructions in the
+ /// rolled form.
+ unsigned RolledDynamicCost;
@@ -593,12 +480,15 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
SmallSetVector<BasicBlock *, 16> BBWorklist;
DenseMap<Value *, Constant *> SimplifiedValues;
- // Use a cache to access SCEV expressions so that we don't pay the cost on
- // each iteration. This cache is lazily self-populating.
- SCEVCache SC(*L, SE);
- unsigned NumberOfOptimizedInstructions = 0;
- unsigned UnrolledLoopSize = 0;
+ // The estimated cost of the unrolled form of the loop. We try to estimate
+ // this by simplifying as much as we can while computing the estimate.
+ unsigned UnrolledCost = 0;
+ // We also track the estimated dynamic (that is, actually executed) cost in
+ // the rolled form. This helps identify cases when the savings from unrolling
+ // aren't just exposing dead control flows, but actual reduced dynamic
+ // instructions due to the simplifications which we expect to occur after
+ // unrolling.
+ unsigned RolledDynamicCost = 0;
// Simulate execution of each iteration of the loop counting instructions,
// which would be simplified.
@@ -606,7 +496,7 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
// we literally have to go through all loop's iterations.
for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
- UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SC);
+ UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, L, SE);
@@ -618,17 +508,20 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
// it. We don't change the actual IR, just count optimization
// opportunities.
for (Instruction &I : *BB) {
- UnrolledLoopSize += TTI.getUserCost(&I);
+ unsigned InstCost = TTI.getUserCost(&I);
// Visit the instruction to analyze its loop cost after unrolling,
- // and if the visitor returns true, then we can optimize this
- // instruction away.
- if (Analyzer.visit(I))
- NumberOfOptimizedInstructions += TTI.getUserCost(&I);
+ // and if the visitor returns false, include this instruction in the
+ // unrolled cost.
+ if (!Analyzer.visit(I))
+ UnrolledCost += InstCost;
+ // Also track this instructions expected cost when executing the rolled
+ // loop form.
+ RolledDynamicCost += InstCost;
// If unrolled body turns out to be too big, bail out.
- if (UnrolledLoopSize - NumberOfOptimizedInstructions >
- MaxUnrolledLoopSize)
+ if (UnrolledCost > MaxUnrolledLoopSize)
return None;
@@ -640,10 +533,10 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
// If we found no optimization opportunities on the first iteration, we
// won't find them on later ones too.
- if (!NumberOfOptimizedInstructions)
+ if (UnrolledCost == RolledDynamicCost)
return None;
- return {{NumberOfOptimizedInstructions, UnrolledLoopSize}};
+ return {{UnrolledCost, RolledDynamicCost}};
/// ApproximateLoopSize - Approximate the size of the loop.
@@ -749,46 +642,56 @@ static void SetLoopAlreadyUnrolled(Loop *L) {
-bool LoopUnroll::canUnrollCompletely(
- Loop *L, unsigned Threshold, unsigned AbsoluteThreshold,
- uint64_t UnrolledSize, unsigned NumberOfOptimizedInstructions,
- unsigned PercentOfOptimizedForCompleteUnroll) {
+bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold,
+ unsigned PercentDynamicCostSavedThreshold,
+ unsigned DynamicCostSavingsDiscount,
+ uint64_t UnrolledCost,
+ uint64_t RolledDynamicCost) {
if (Threshold == NoThreshold) {
DEBUG(dbgs() << " Can fully unroll, because no threshold is set.\n");
return true;
- if (UnrolledSize <= Threshold) {
- DEBUG(dbgs() << " Can fully unroll, because unrolled size: "
- << UnrolledSize << "<" << Threshold << "\n");
+ if (UnrolledCost <= Threshold) {
+ DEBUG(dbgs() << " Can fully unroll, because unrolled cost: "
+ << UnrolledCost << "<" << Threshold << "\n");
return true;
- assert(UnrolledSize && "UnrolledSize can't be 0 at this point.");
- unsigned PercentOfOptimizedInstructions =
- (uint64_t)NumberOfOptimizedInstructions * 100ull / UnrolledSize;
- if (UnrolledSize <= AbsoluteThreshold &&
- PercentOfOptimizedInstructions >= PercentOfOptimizedForCompleteUnroll) {
- DEBUG(dbgs() << " Can fully unroll, because unrolling will help removing "
- << PercentOfOptimizedInstructions
- << "% instructions (threshold: "
- << PercentOfOptimizedForCompleteUnroll << "%)\n");
- DEBUG(dbgs() << " Unrolled size (" << UnrolledSize
- << ") is less than the threshold (" << AbsoluteThreshold
- << ").\n");
+ assert(UnrolledCost && "UnrolledCost can't be 0 at this point.");
+ assert(RolledDynamicCost >= UnrolledCost &&
+ "Cannot have a higher unrolled cost than a rolled cost!");
+ // Compute the percentage of the dynamic cost in the rolled form that is
+ // saved when unrolled. If unrolling dramatically reduces the estimated
+ // dynamic cost of the loop, we use a higher threshold to allow more
+ // unrolling.
+ unsigned PercentDynamicCostSaved =
+ (uint64_t)(RolledDynamicCost - UnrolledCost) * 100ull / RolledDynamicCost;
+ if (PercentDynamicCostSaved >= PercentDynamicCostSavedThreshold &&
+ (int64_t)UnrolledCost - (int64_t)DynamicCostSavingsDiscount <=
+ (int64_t)Threshold) {
+ DEBUG(dbgs() << " Can fully unroll, because unrolling will reduce the "
+ "expected dynamic cost by " << PercentDynamicCostSaved
+ << "% (threshold: " << PercentDynamicCostSavedThreshold
+ << "%)\n"
+ << " and the unrolled cost (" << UnrolledCost
+ << ") is less than the max threshold ("
+ << DynamicCostSavingsDiscount << ").\n");
return true;
DEBUG(dbgs() << " Too large to fully unroll:\n");
- DEBUG(dbgs() << " Unrolled size: " << UnrolledSize << "\n");
- DEBUG(dbgs() << " Estimated number of optimized instructions: "
- << NumberOfOptimizedInstructions << "\n");
- DEBUG(dbgs() << " Absolute threshold: " << AbsoluteThreshold << "\n");
- DEBUG(dbgs() << " Minimum percent of removed instructions: "
- << PercentOfOptimizedForCompleteUnroll << "\n");
- DEBUG(dbgs() << " Threshold for small loops: " << Threshold << "\n");
+ DEBUG(dbgs() << " Threshold: " << Threshold << "\n");
+ DEBUG(dbgs() << " Max threshold: " << DynamicCostSavingsDiscount << "\n");
+ DEBUG(dbgs() << " Percent cost saved threshold: "
+ << PercentDynamicCostSavedThreshold << "%\n");
+ DEBUG(dbgs() << " Unrolled cost: " << UnrolledCost << "\n");
+ DEBUG(dbgs() << " Rolled dynamic cost: " << RolledDynamicCost << "\n");
+ DEBUG(dbgs() << " Percent cost saved: " << PercentDynamicCostSaved
+ << "\n");
return false;
@@ -899,9 +802,11 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
unsigned Threshold, PartialThreshold;
- unsigned AbsoluteThreshold, PercentOfOptimizedForCompleteUnroll;
+ unsigned PercentDynamicCostSavedThreshold;
+ unsigned DynamicCostSavingsDiscount;
selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold,
- AbsoluteThreshold, PercentOfOptimizedForCompleteUnroll);
+ PercentDynamicCostSavedThreshold,
+ DynamicCostSavingsDiscount);
// Given Count, TripCount and thresholds determine the type of
// unrolling which is to be performed.
@@ -910,20 +815,18 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
if (TripCount && Count == TripCount) {
Unrolling = Partial;
// If the loop is really small, we don't need to run an expensive analysis.
- if (canUnrollCompletely(
- L, Threshold, AbsoluteThreshold,
- UnrolledSize, 0, 100)) {
+ if (canUnrollCompletely(L, Threshold, 100, DynamicCostSavingsDiscount,
+ UnrolledSize, UnrolledSize)) {
Unrolling = Full;
} else {
// The loop isn't that small, but we still can fully unroll it if that
// helps to remove a significant number of instructions.
// To check that, run additional analysis on the loop.
- if (Optional<EstimatedUnrollCost> Cost =
- analyzeLoopUnrollCost(L, TripCount, *SE, TTI, AbsoluteThreshold))
- if (canUnrollCompletely(L, Threshold, AbsoluteThreshold,
- Cost->UnrolledLoopSize,
- Cost->NumberOfOptimizedInstructions,
- PercentOfOptimizedForCompleteUnroll)) {
+ if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
+ L, TripCount, *SE, TTI, Threshold + DynamicCostSavingsDiscount))
+ if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold,
+ DynamicCostSavingsDiscount, Cost->UnrolledCost,
+ Cost->RolledDynamicCost)) {
Unrolling = Full;
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 66d6ac6..2bdf670 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -510,7 +510,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// Check that nothing touches the dest of the "copy" between
// the call and the store.
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- AliasAnalysis::Location StoreLoc = AA.getLocation(SI);
+ AliasAnalysis::Location StoreLoc = MemoryLocation::get(SI);
for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
E = C; I != E; --I) {
if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
@@ -802,9 +802,8 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
// NOTE: This is conservative, it will stop on any read from the source loc,
// not just the defining memcpy.
- MemDepResult SourceDep =
- MD->getPointerDependencyFrom(AA.getLocationForSource(MDep),
- false, M, M->getParent());
+ MemDepResult SourceDep = MD->getPointerDependencyFrom(
+ MemoryLocation::getForSource(MDep), false, M, M->getParent());
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
@@ -812,7 +811,8 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
// source and dest might overlap. We still want to eliminate the intermediate
// value, but we have to generate a memmove instead of memcpy.
bool UseMemMove = false;
- if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)))
+ if (!AA.isNoAlias(MemoryLocation::getForDest(M),
+ MemoryLocation::getForSource(MDep)))
UseMemMove = true;
// If all checks passed, then we can transform M.
@@ -860,9 +860,8 @@ bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
return false;
// Check that there are no other dependencies on the memset destination.
- MemDepResult DstDepInfo =
- MD->getPointerDependencyFrom(AliasAnalysis::getLocationForDest(MemSet),
- false, MemCpy, MemCpy->getParent());
+ MemDepResult DstDepInfo = MD->getPointerDependencyFrom(
+ MemoryLocation::getForDest(MemSet), false, MemCpy, MemCpy->getParent());
if (DstDepInfo.getInst() != MemSet)
return false;
@@ -998,7 +997,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
- AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M);
+ AliasAnalysis::Location SrcLoc = MemoryLocation::getForSource(M);
MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
M, M->getParent());
@@ -1047,7 +1046,8 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
return false;
// See if the pointers alias.
- if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
+ if (!AA.isNoAlias(MemoryLocation::getForDest(M),
+ MemoryLocation::getForSource(M)))
return false;
DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
@@ -1121,8 +1121,8 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
// NOTE: This is conservative, it will stop on any read from the source loc,
// not just the defining memcpy.
MemDepResult SourceDep =
- MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep),
- false, CS.getInstruction(), MDep->getParent());
+ MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
+ CS.getInstruction(), MDep->getParent());
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 611a941..776dfb4 100644
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -241,7 +241,7 @@ bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start,
const Instruction& End,
LoadInst* LI) {
- AliasAnalysis::Location Loc = AA->getLocation(LI);
+ AliasAnalysis::Location Loc = MemoryLocation::get(LI);
return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Mod);
@@ -266,8 +266,8 @@ LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1,
LoadInst *Load1 = dyn_cast<LoadInst>(Inst);
BasicBlock *BB0 = Load0->getParent();
- AliasAnalysis::Location Loc0 = AA->getLocation(Load0);
- AliasAnalysis::Location Loc1 = AA->getLocation(Load1);
+ AliasAnalysis::Location Loc0 = MemoryLocation::get(Load0);
+ AliasAnalysis::Location Loc1 = MemoryLocation::get(Load1);
if (AA->isMustAlias(Loc0, Loc1) && Load0->isSameOperationAs(Load1) &&
!isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1) &&
!isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0)) {
@@ -425,8 +425,8 @@ StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
StoreInst *Store1 = cast<StoreInst>(Inst);
- AliasAnalysis::Location Loc0 = AA->getLocation(Store0);
- AliasAnalysis::Location Loc1 = AA->getLocation(Store1);
+ AliasAnalysis::Location Loc0 = MemoryLocation::get(Store0);
+ AliasAnalysis::Location Loc1 = MemoryLocation::get(Store1);
if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&
BB1->back(), Loc1) &&
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
index 5b370e0..4cf68b0 100644
--- a/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -234,6 +234,7 @@ bool NaryReassociate::doOneIteration(Function &F) {
BasicBlock *BB = Node->getBlock();
for (auto I = BB->begin(); I != BB->end(); ++I) {
if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(I)) {
+ const SCEV *OldSCEV = SE->getSCEV(I);
if (Instruction *NewI = tryReassociate(I)) {
Changed = true;
@@ -243,7 +244,28 @@ bool NaryReassociate::doOneIteration(Function &F) {
// Add the rewritten instruction to SeenExprs; the original instruction
// is deleted.
- SeenExprs[SE->getSCEV(I)].push_back(I);
+ const SCEV *NewSCEV = SE->getSCEV(I);
+ SeenExprs[NewSCEV].push_back(I);
+ // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)
+ // is equivalent to I. However, ScalarEvolution::getSCEV may
+ // weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose
+ // we reassociate
+ // I = &a[sext(i +nsw j)] // assuming sizeof(a[0]) = 4
+ // to
+ // NewI = &a[sext(i)] + sext(j).
+ //
+ // ScalarEvolution computes
+ // getSCEV(I) = a + 4 * sext(i + j)
+ // getSCEV(newI) = a + 4 * sext(i) + 4 * sext(j)
+ // which are different SCEVs.
+ //
+ // To alleviate this issue of ScalarEvolution not always capturing
+ // equivalence, we add I to SeenExprs[OldSCEV] as well so that we can
+ // map both SCEV before and after tryReassociate(I) to I.
+ //
+ // This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll.
+ if (NewSCEV != OldSCEV)
+ SeenExprs[OldSCEV].push_back(I);
@@ -295,8 +317,10 @@ static bool isGEPFoldable(GetElementPtrInst *GEP,
BaseOffset += DL->getStructLayout(STy)->getElementOffset(Field);
+ unsigned AddrSpace = GEP->getPointerAddressSpace();
return TTI->isLegalAddressingMode(GEP->getType()->getElementType(), BaseGV,
- BaseOffset, HasBaseReg, Scale);
+ BaseOffset, HasBaseReg, Scale, AddrSpace);
Instruction *NaryReassociate::tryReassociateGEP(GetElementPtrInst *GEP) {
diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 3e7deeb..9ecaf10 100644
--- a/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -496,7 +496,7 @@ template <typename T> static void unique_unsorted(std::vector<T> &vec) {
-static std::string GCSafepointPollName("gc.safepoint_poll");
+static const char *const GCSafepointPollName = "gc.safepoint_poll";
static bool isGCSafepointPoll(Function &F) {
return F.getName().equals(GCSafepointPollName);
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index b677523..6c66b58 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -733,7 +733,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
if (Ops.empty()) {
Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
assert(Identity && "Associative operation without identity!");
- Ops.push_back(std::make_pair(Identity, APInt(Bitwidth, 1)));
+ Ops.emplace_back(Identity, APInt(Bitwidth, 1));
return Changed;
@@ -1966,38 +1966,35 @@ Instruction *Reassociate::canonicalizeNegConstExpr(Instruction *I) {
if (!I->hasOneUse() || I->getType()->isVectorTy())
return nullptr;
- // Must be a mul, fmul, or fdiv instruction.
+ // Must be a fmul or fdiv instruction.
unsigned Opcode = I->getOpcode();
- if (Opcode != Instruction::Mul && Opcode != Instruction::FMul &&
- Opcode != Instruction::FDiv)
+ if (Opcode != Instruction::FMul && Opcode != Instruction::FDiv)
return nullptr;
- // Must have at least one constant operand.
- Constant *C0 = dyn_cast<Constant>(I->getOperand(0));
- Constant *C1 = dyn_cast<Constant>(I->getOperand(1));
- if (!C0 && !C1)
+ auto *C0 = dyn_cast<ConstantFP>(I->getOperand(0));
+ auto *C1 = dyn_cast<ConstantFP>(I->getOperand(1));
+ // Both operands are constant, let it get constant folded away.
+ if (C0 && C1)
return nullptr;
- // Must be a negative ConstantInt or ConstantFP.
- Constant *C = C0 ? C0 : C1;
- unsigned ConstIdx = C0 ? 0 : 1;
- if (auto *CI = dyn_cast<ConstantInt>(C)) {
- if (!CI->isNegative() || CI->isMinValue(true))
- return nullptr;
- } else if (auto *CF = dyn_cast<ConstantFP>(C)) {
- if (!CF->isNegative())
- return nullptr;
- } else
+ ConstantFP *CF = C0 ? C0 : C1;
+ // Must have one constant operand.
+ if (!CF)
+ return nullptr;
+ // Must be a negative ConstantFP.
+ if (!CF->isNegative())
return nullptr;
// User must be a binary operator with one or more uses.
Instruction *User = I->user_back();
- if (!isa<BinaryOperator>(User) || !User->getNumUses())
+ if (!isa<BinaryOperator>(User) || !User->hasNUsesOrMore(1))
return nullptr;
unsigned UserOpcode = User->getOpcode();
- if (UserOpcode != Instruction::Add && UserOpcode != Instruction::FAdd &&
- UserOpcode != Instruction::Sub && UserOpcode != Instruction::FSub)
+ if (UserOpcode != Instruction::FAdd && UserOpcode != Instruction::FSub)
return nullptr;
// Subtraction is not commutative. Explicitly, the following transform is
@@ -2006,14 +2003,9 @@ Instruction *Reassociate::canonicalizeNegConstExpr(Instruction *I) {
return nullptr;
// Change the sign of the constant.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
- I->setOperand(ConstIdx, ConstantInt::get(CI->getContext(), -CI->getValue()));
- else {
- ConstantFP *CF = cast<ConstantFP>(C);
- APFloat Val = CF->getValueAPF();
- Val.changeSign();
- I->setOperand(ConstIdx, ConstantFP::get(CF->getContext(), Val));
- }
+ APFloat Val = CF->getValueAPF();
+ Val.changeSign();
+ I->setOperand(C0 ? 0 : 1, ConstantFP::get(CF->getContext(), Val));
// Canonicalize I to RHS to simplify the next bit of logic. E.g.,
// ((-Const*y) + x) -> (x + (-Const*y)).
@@ -2023,15 +2015,9 @@ Instruction *Reassociate::canonicalizeNegConstExpr(Instruction *I) {
Value *Op0 = User->getOperand(0);
Value *Op1 = User->getOperand(1);
BinaryOperator *NI;
- switch(UserOpcode) {
+ switch (UserOpcode) {
llvm_unreachable("Unexpected Opcode!");
- case Instruction::Add:
- NI = BinaryOperator::CreateSub(Op0, Op1);
- break;
- case Instruction::Sub:
- NI = BinaryOperator::CreateAdd(Op0, Op1);
- break;
case Instruction::FAdd:
NI = BinaryOperator::CreateFSub(Op0, Op1);
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 6cf765a..6f6ba72 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -30,6 +30,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
@@ -74,13 +75,27 @@ static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
namespace {
-struct RewriteStatepointsForGC : public FunctionPass {
+struct RewriteStatepointsForGC : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- RewriteStatepointsForGC() : FunctionPass(ID) {
+ RewriteStatepointsForGC() : ModulePass(ID) {
- bool runOnFunction(Function &F) override;
+ bool runOnFunction(Function &F);
+ bool runOnModule(Module &M) override {
+ bool Changed = false;
+ for (Function &F : M)
+ Changed |= runOnFunction(F);
+ if (Changed) {
+ // stripDereferenceabilityInfo asserts that shouldRewriteStatepointsIn
+ // returns true for at least one function in the module. Since at least
+ // one function changed, we know that the precondition is satisfied.
+ stripDereferenceabilityInfo(M);
+ }
+ return Changed;
+ }
void getAnalysisUsage(AnalysisUsage &AU) const override {
// We add and rewrite a bunch of instructions, but don't really do much
@@ -88,12 +103,26 @@ struct RewriteStatepointsForGC : public FunctionPass {
+ /// The IR fed into RewriteStatepointsForGC may have had attributes implying
+ /// dereferenceability that are no longer valid/correct after
+ /// RewriteStatepointsForGC has run. This is because semantically, after
+ /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire
+ /// heap. stripDereferenceabilityInfo (conservatively) restores correctness
+ /// by erasing all attributes in the module that externally imply
+ /// dereferenceability.
+ ///
+ void stripDereferenceabilityInfo(Module &M);
+ // Helpers for stripDereferenceabilityInfo
+ void stripDereferenceabilityInfoFromBody(Function &F);
+ void stripDereferenceabilityInfoFromPrototype(Function &F);
} // namespace
char RewriteStatepointsForGC::ID = 0;
-FunctionPass *llvm::createRewriteStatepointsForGCPass() {
+ModulePass *llvm::createRewriteStatepointsForGCPass() {
return new RewriteStatepointsForGC();
@@ -1031,14 +1060,11 @@ static void recomputeLiveInValues(
// goes through the statepoint. We might need to split an edge to make this
// possible.
static BasicBlock *
-normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, Pass *P) {
- DominatorTree *DT = nullptr;
- if (auto *DTP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>())
- DT = &DTP->getDomTree();
+normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent,
+ DominatorTree &DT) {
BasicBlock *Ret = BB;
if (!BB->getUniquePredecessor()) {
- Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, DT);
+ Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, &DT);
// Now that 'ret' has unique predecessor we can safely remove all phi nodes
@@ -2016,9 +2042,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction());
normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(),
- P);
+ DT);
normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(),
- P);
+ DT);
// A list of dummy calls added to the IR to keep various values obviously
@@ -2197,6 +2223,72 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
return !records.empty();
+// Handles both return values and arguments for Functions and CallSites.
+template <typename AttrHolder>
+static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
+ unsigned Index) {
+ AttrBuilder R;
+ if (AH.getDereferenceableBytes(Index))
+ R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable,
+ AH.getDereferenceableBytes(Index)));
+ if (AH.getDereferenceableOrNullBytes(Index))
+ R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull,
+ AH.getDereferenceableOrNullBytes(Index)));
+ if (!R.empty())
+ AH.setAttributes(AH.getAttributes().removeAttributes(
+ Ctx, Index, AttributeSet::get(Ctx, Index, R)));
+RewriteStatepointsForGC::stripDereferenceabilityInfoFromPrototype(Function &F) {
+ LLVMContext &Ctx = F.getContext();
+ for (Argument &A : F.args())
+ if (isa<PointerType>(A.getType()))
+ RemoveDerefAttrAtIndex(Ctx, F, A.getArgNo() + 1);
+ if (isa<PointerType>(F.getReturnType()))
+ RemoveDerefAttrAtIndex(Ctx, F, AttributeSet::ReturnIndex);
+void RewriteStatepointsForGC::stripDereferenceabilityInfoFromBody(Function &F) {
+ if (F.empty())
+ return;
+ LLVMContext &Ctx = F.getContext();
+ MDBuilder Builder(Ctx);
+ for (Instruction &I : inst_range(F)) {
+ if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) {
+ assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!");
+ bool IsImmutableTBAA =
+ MD->getNumOperands() == 4 &&
+ mdconst::extract<ConstantInt>(MD->getOperand(3))->getValue() == 1;
+ if (!IsImmutableTBAA)
+ continue; // no work to do, MD_tbaa is already marked mutable
+ MDNode *Base = cast<MDNode>(MD->getOperand(0));
+ MDNode *Access = cast<MDNode>(MD->getOperand(1));
+ uint64_t Offset =
+ mdconst::extract<ConstantInt>(MD->getOperand(2))->getZExtValue();
+ MDNode *MutableTBAA =
+ Builder.createTBAAStructTagNode(Base, Access, Offset);
+ I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA);
+ }
+ if (CallSite CS = CallSite(&I)) {
+ for (int i = 0, e = CS.arg_size(); i != e; i++)
+ if (isa<PointerType>(CS.getArgument(i)->getType()))
+ RemoveDerefAttrAtIndex(Ctx, CS, i + 1);
+ if (isa<PointerType>(CS.getType()))
+ RemoveDerefAttrAtIndex(Ctx, CS, AttributeSet::ReturnIndex);
+ }
+ }
/// Returns true if this function should be rewritten by this pass. The main
/// point of this function is as an extension point for custom logic.
static bool shouldRewriteStatepointsIn(Function &F) {
@@ -2211,6 +2303,19 @@ static bool shouldRewriteStatepointsIn(Function &F) {
return false;
+void RewriteStatepointsForGC::stripDereferenceabilityInfo(Module &M) {
+#ifndef NDEBUG
+ assert(std::any_of(M.begin(), M.end(), shouldRewriteStatepointsIn) &&
+ "precondition!");
+ for (Function &F : M)
+ stripDereferenceabilityInfoFromPrototype(F);
+ for (Function &F : M)
+ stripDereferenceabilityInfoFromBody(F);
bool RewriteStatepointsForGC::runOnFunction(Function &F) {
// Nothing to do for declarations.
if (F.isDeclaration() || F.empty())
@@ -2221,7 +2326,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) {
if (!shouldRewriteStatepointsIn(F))
return false;
- DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
// Gather all the statepoints which need rewritten. Be careful to only
// consider those in reachable code since we need to ask dominance queries
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 3a782d1..4a87531 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -852,9 +852,11 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
TargetTransformInfo &TTI =
+ unsigned AddrSpace = GEP->getPointerAddressSpace();
if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(),
/*BaseGV=*/nullptr, AccumulativeByteOffset,
- /*HasBaseReg=*/true, /*Scale=*/0)) {
+ /*HasBaseReg=*/true, /*Scale=*/0,
+ AddrSpace)) {
return Changed;
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 8566cd9..f0e3ffd 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -193,11 +193,18 @@ namespace {
struct CFGSimplifyPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
unsigned BonusInstThreshold;
- CFGSimplifyPass(int T = -1) : FunctionPass(ID) {
+ std::function<bool(const Function &)> PredicateFtor;
+ CFGSimplifyPass(int T = -1,
+ std::function<bool(const Function &)> Ftor = nullptr)
+ : FunctionPass(ID), PredicateFtor(Ftor) {
BonusInstThreshold = (T == -1) ? UserBonusInstThreshold : unsigned(T);
bool runOnFunction(Function &F) override {
+ if (PredicateFtor && !PredicateFtor(F))
+ return false;
if (skipOptnoneFunction(F))
return false;
@@ -224,7 +231,9 @@ INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
// Public interface to the CFGSimplification pass
-FunctionPass *llvm::createCFGSimplificationPass(int Threshold) {
- return new CFGSimplifyPass(Threshold);
+FunctionPass *
+llvm::createCFGSimplificationPass(int Threshold,
+ std::function<bool(const Function &)> Ftor) {
+ return new CFGSimplifyPass(Threshold, Ftor);
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index b169d56..078c6a9 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -163,7 +163,7 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
- AliasAnalysis::Location Loc = AA->getLocation(L);
+ AliasAnalysis::Location Loc = MemoryLocation::get(L);
for (Instruction *S : Stores)
if (AA->getModRefInfo(S, Loc) & AliasAnalysis::Mod)
return false;
@@ -172,6 +172,12 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst))
return false;
+ // Convergent operations can only be moved to control equivalent blocks.
+ if (auto CS = CallSite(Inst)) {
+ if (CS.hasFnAttr(Attribute::Convergent))
+ return false;
+ }
return true;
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index a5890c0..5f25e6b 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -491,6 +491,9 @@ bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
const DataLayout &DL = Phi->getModule()->getDataLayout();
int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
+ if (!Size)
+ return false;
int64_t CVSize = CV->getSExtValue();
if (CVSize % Size)
return false;
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 623dbc9..a87f850 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -636,7 +636,7 @@ void PromoteMem2Reg::run() {
// and inserting the phi nodes we marked as necessary
std::vector<RenamePassData> RenamePassWorkList;
- RenamePassWorkList.push_back(RenamePassData(F.begin(), nullptr, Values));
+ RenamePassWorkList.emplace_back(F.begin(), nullptr, std::move(Values));
do {
RenamePassData RPD;
@@ -973,7 +973,7 @@ NextIteration:
for (; I != E; ++I)
if (VisitedSuccs.insert(*I).second)
- Worklist.push_back(RenamePassData(*I, Pred, IncomingVals));
+ Worklist.emplace_back(*I, Pred, IncomingVals);
goto NextIteration;
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 3757a80..ab30aa1 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -141,7 +141,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
Changed = true;
if (IVOperand->use_empty())
- DeadInsts.push_back(IVOperand);
+ DeadInsts.emplace_back(IVOperand);
return IVSrc;
@@ -178,7 +178,7 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
Changed = true;
- DeadInsts.push_back(ICmp);
+ DeadInsts.emplace_back(ICmp);
/// SimplifyIVUsers helper for eliminating useless
@@ -229,7 +229,7 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
Changed = true;
- DeadInsts.push_back(Rem);
+ DeadInsts.emplace_back(Rem);
/// Eliminate an operation that consumes a simple IV and has
@@ -260,7 +260,7 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
Changed = true;
- DeadInsts.push_back(UseInst);
+ DeadInsts.emplace_back(UseInst);
return true;
@@ -386,7 +386,7 @@ Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser,
"Bad add instruction created from overflow intrinsic.");
- DeadInsts.push_back(AddVal);
+ DeadInsts.emplace_back(AddVal);
return AddInst;
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index cac80ac..8c72641 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -400,8 +400,11 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
if (auto *AI = dyn_cast<AllocaInst>(I))
- if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ GEP->setResultElementType(
+ TypeMapper->remapType(GEP->getResultElementType()));
+ }
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 011fd0f..95c9381 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -34,6 +34,10 @@
// Variable uniformity checks are inspired by:
// Karrenberg, R. and Hack, S. Whole Function Vectorization.
+// The interleaved access vectorization is based on the paper:
+// Dorit Nuzman, Ira Rosen and Ayal Zaks. Auto-Vectorization of Interleaved
+// Data for SIMD
// Other ideas/concepts are from:
// A. Zaks and D. Nuzman. Autovectorization in GCC-two years later.
@@ -134,6 +138,16 @@ static cl::opt<bool> EnableMemAccessVersioning(
"enable-mem-access-versioning", cl::init(true), cl::Hidden,
cl::desc("Enable symblic stride memory access versioning"));
+static cl::opt<bool> EnableInterleavedMemAccesses(
+ "enable-interleaved-mem-accesses", cl::init(false), cl::Hidden,
+ cl::desc("Enable vectorization on interleaved memory accesses in a loop"));
+/// Maximum factor for an interleaved memory access.
+static cl::opt<unsigned> MaxInterleaveGroupFactor(
+ "max-interleave-group-factor", cl::Hidden,
+ cl::desc("Maximum factor for an interleaved access group (default = 8)"),
+ cl::init(8));
/// We don't unroll loops with a known constant trip count below this number.
static const unsigned TinyTripCountUnrollThreshold = 128;
@@ -351,6 +365,9 @@ protected:
/// broadcast them into a vector.
VectorParts &getVectorValue(Value *V);
+ /// Try to vectorize the interleaved access group that \p Instr belongs to.
+ void vectorizeInterleaveGroup(Instruction *Instr);
/// Generate a shuffle sequence that will reverse the vector Vec.
virtual Value *reverseVector(Value *Vec);
@@ -545,6 +562,219 @@ static void propagateMetadata(SmallVectorImpl<Value *> &To, const Instruction *F
propagateMetadata(I, From);
+/// \brief The group of interleaved loads/stores sharing the same stride and
+/// close to each other.
+/// Each member in this group has an index starting from 0, and the largest
+/// index should be less than interleaved factor, which is equal to the absolute
+/// value of the access's stride.
+/// E.g. An interleaved load group of factor 4:
+/// for (unsigned i = 0; i < 1024; i+=4) {
+/// a = A[i]; // Member of index 0
+/// b = A[i+1]; // Member of index 1
+/// d = A[i+3]; // Member of index 3
+/// ...
+/// }
+/// An interleaved store group of factor 4:
+/// for (unsigned i = 0; i < 1024; i+=4) {
+/// ...
+/// A[i] = a; // Member of index 0
+/// A[i+1] = b; // Member of index 1
+/// A[i+2] = c; // Member of index 2
+/// A[i+3] = d; // Member of index 3
+/// }
+/// Note: the interleaved load group could have gaps (missing members), but
+/// the interleaved store group doesn't allow gaps.
+class InterleaveGroup {
+ InterleaveGroup(Instruction *Instr, int Stride, unsigned Align)
+ : Align(Align), SmallestKey(0), LargestKey(0), InsertPos(Instr) {
+ assert(Align && "The alignment should be non-zero");
+ Factor = std::abs(Stride);
+ assert(Factor > 1 && "Invalid interleave factor");
+ Reverse = Stride < 0;
+ Members[0] = Instr;
+ }
+ bool isReverse() const { return Reverse; }
+ unsigned getFactor() const { return Factor; }
+ unsigned getAlignment() const { return Align; }
+ unsigned getNumMembers() const { return Members.size(); }
+ /// \brief Try to insert a new member \p Instr with index \p Index and
+ /// alignment \p NewAlign. The index is related to the leader and it could be
+ /// negative if it is the new leader.
+ ///
+ /// \returns false if the instruction doesn't belong to the group.
+ bool insertMember(Instruction *Instr, int Index, unsigned NewAlign) {
+ assert(NewAlign && "The new member's alignment should be non-zero");
+ int Key = Index + SmallestKey;
+ // Skip if there is already a member with the same index.
+ if (Members.count(Key))
+ return false;
+ if (Key > LargestKey) {
+ // The largest index is always less than the interleave factor.
+ if (Index >= static_cast<int>(Factor))
+ return false;
+ LargestKey = Key;
+ } else if (Key < SmallestKey) {
+ // The largest index is always less than the interleave factor.
+ if (LargestKey - Key >= static_cast<int>(Factor))
+ return false;
+ SmallestKey = Key;
+ }
+ // It's always safe to select the minimum alignment.
+ Align = std::min(Align, NewAlign);
+ Members[Key] = Instr;
+ return true;
+ }
+ /// \brief Get the member with the given index \p Index
+ ///
+ /// \returns nullptr if contains no such member.
+ Instruction *getMember(unsigned Index) const {
+ int Key = SmallestKey + Index;
+ if (!Members.count(Key))
+ return nullptr;
+ return Members.find(Key)->second;
+ }
+ /// \brief Get the index for the given member. Unlike the key in the member
+ /// map, the index starts from 0.
+ unsigned getIndex(Instruction *Instr) const {
+ for (auto I : Members)
+ if (I.second == Instr)
+ return I.first - SmallestKey;
+ llvm_unreachable("InterleaveGroup contains no such member");
+ }
+ Instruction *getInsertPos() const { return InsertPos; }
+ void setInsertPos(Instruction *Inst) { InsertPos = Inst; }
+ unsigned Factor; // Interleave Factor.
+ bool Reverse;
+ unsigned Align;
+ DenseMap<int, Instruction *> Members;
+ int SmallestKey;
+ int LargestKey;
+ // To avoid breaking dependences, vectorized instructions of an interleave
+ // group should be inserted at either the first load or the last store in
+ // program order.
+ //
+ // E.g. %even = load i32 // Insert Position
+ // %add = add i32 %even // Use of %even
+ // %odd = load i32
+ //
+ // store i32 %even
+ // %odd = add i32 // Def of %odd
+ // store i32 %odd // Insert Position
+ Instruction *InsertPos;
+/// \brief Drive the analysis of interleaved memory accesses in the loop.
+/// Use this class to analyze interleaved accesses only when we can vectorize
+/// a loop. Otherwise it's meaningless to do analysis as the vectorization
+/// on interleaved accesses is unsafe.
+/// The analysis collects interleave groups and records the relationships
+/// between the member and the group in a map.
+class InterleavedAccessInfo {
+ InterleavedAccessInfo(ScalarEvolution *SE, Loop *L, DominatorTree *DT)
+ : SE(SE), TheLoop(L), DT(DT) {}
+ ~InterleavedAccessInfo() {
+ SmallSet<InterleaveGroup *, 4> DelSet;
+ // Avoid releasing a pointer twice.
+ for (auto &I : InterleaveGroupMap)
+ DelSet.insert(I.second);
+ for (auto *Ptr : DelSet)
+ delete Ptr;
+ }
+ /// \brief Analyze the interleaved accesses and collect them in interleave
+ /// groups. Substitute symbolic strides using \p Strides.
+ void analyzeInterleaving(const ValueToValueMap &Strides);
+ /// \brief Check if \p Instr belongs to any interleave group.
+ bool isInterleaved(Instruction *Instr) const {
+ return InterleaveGroupMap.count(Instr);
+ }
+ /// \brief Get the interleave group that \p Instr belongs to.
+ ///
+ /// \returns nullptr if doesn't have such group.
+ InterleaveGroup *getInterleaveGroup(Instruction *Instr) const {
+ if (InterleaveGroupMap.count(Instr))
+ return InterleaveGroupMap.find(Instr)->second;
+ return nullptr;
+ }
+ ScalarEvolution *SE;
+ Loop *TheLoop;
+ DominatorTree *DT;
+ /// Holds the relationships between the members and the interleave group.
+ DenseMap<Instruction *, InterleaveGroup *> InterleaveGroupMap;
+ /// \brief The descriptor for a strided memory access.
+ struct StrideDescriptor {
+ StrideDescriptor(int Stride, const SCEV *Scev, unsigned Size,
+ unsigned Align)
+ : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
+ StrideDescriptor() : Stride(0), Scev(nullptr), Size(0), Align(0) {}
+ int Stride; // The access's stride. It is negative for a reverse access.
+ const SCEV *Scev; // The scalar expression of this access
+ unsigned Size; // The size of the memory object.
+ unsigned Align; // The alignment of this access.
+ };
+ /// \brief Create a new interleave group with the given instruction \p Instr,
+ /// stride \p Stride and alignment \p Align.
+ ///
+ /// \returns the newly created interleave group.
+ InterleaveGroup *createInterleaveGroup(Instruction *Instr, int Stride,
+ unsigned Align) {
+ assert(!InterleaveGroupMap.count(Instr) &&
+ "Already in an interleaved access group");
+ InterleaveGroupMap[Instr] = new InterleaveGroup(Instr, Stride, Align);
+ return InterleaveGroupMap[Instr];
+ }
+ /// \brief Release the group and remove all the relationships.
+ void releaseGroup(InterleaveGroup *Group) {
+ for (unsigned i = 0; i < Group->getFactor(); i++)
+ if (Instruction *Member = Group->getMember(i))
+ InterleaveGroupMap.erase(Member);
+ delete Group;
+ }
+ /// \brief Collect all the accesses with a constant stride in program order.
+ void collectConstStridedAccesses(
+ MapVector<Instruction *, StrideDescriptor> &StrideAccesses,
+ const ValueToValueMap &Strides);
/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
/// to what vectorization factor.
/// This class does not look at the profitability of vectorization, only the
@@ -565,8 +795,8 @@ public:
Function *F, const TargetTransformInfo *TTI,
LoopAccessAnalysis *LAA)
: NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F),
- TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), Induction(nullptr),
- WidestIndTy(nullptr), HasFunNoNaNAttr(false) {}
+ TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(SE, L, DT),
+ Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false) {}
/// This enum represents the kinds of inductions that we support.
enum InductionKind {
@@ -697,6 +927,16 @@ public:
return LAI;
+ /// \brief Check if \p Instr belongs to any interleaved access group.
+ bool isAccessInterleaved(Instruction *Instr) {
+ return InterleaveInfo.isInterleaved(Instr);
+ }
+ /// \brief Get the interleaved access group that \p Instr belongs to.
+ const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) {
+ return InterleaveInfo.getInterleaveGroup(Instr);
+ }
unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
bool hasStride(Value *V) { return StrideSet.count(V); }
@@ -792,6 +1032,10 @@ private:
// null until canVectorizeMemory sets it up.
const LoopAccessInfo *LAI;
+ /// The interleave access information contains groups of interleaved accesses
+ /// with the same stride and close to each other.
+ InterleavedAccessInfo InterleaveInfo;
// --- vectorization state --- //
/// Holds the integer induction variable. This is the counter of the
@@ -1657,6 +1901,251 @@ Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
+// Get a mask to interleave \p NumVec vectors into a wide vector.
+// I.e. <0, VF, VF*2, ..., VF*(NumVec-1), 1, VF+1, VF*2+1, ...>
+// E.g. For 2 interleaved vectors, if VF is 4, the mask is:
+// <0, 4, 1, 5, 2, 6, 3, 7>
+static Constant *getInterleavedMask(IRBuilder<> &Builder, unsigned VF,
+ unsigned NumVec) {
+ SmallVector<Constant *, 16> Mask;
+ for (unsigned i = 0; i < VF; i++)
+ for (unsigned j = 0; j < NumVec; j++)
+ Mask.push_back(Builder.getInt32(j * VF + i));
+ return ConstantVector::get(Mask);
+// Get the strided mask starting from index \p Start.
+// I.e. <Start, Start + Stride, ..., Start + Stride*(VF-1)>
+static Constant *getStridedMask(IRBuilder<> &Builder, unsigned Start,
+ unsigned Stride, unsigned VF) {
+ SmallVector<Constant *, 16> Mask;
+ for (unsigned i = 0; i < VF; i++)
+ Mask.push_back(Builder.getInt32(Start + i * Stride));
+ return ConstantVector::get(Mask);
+// Get a mask of two parts: The first part consists of sequential integers
+// starting from 0, The second part consists of UNDEFs.
+// I.e. <0, 1, 2, ..., NumInt - 1, undef, ..., undef>
+static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned NumInt,
+ unsigned NumUndef) {
+ SmallVector<Constant *, 16> Mask;
+ for (unsigned i = 0; i < NumInt; i++)
+ Mask.push_back(Builder.getInt32(i));
+ Constant *Undef = UndefValue::get(Builder.getInt32Ty());
+ for (unsigned i = 0; i < NumUndef; i++)
+ Mask.push_back(Undef);
+ return ConstantVector::get(Mask);
+// Concatenate two vectors with the same element type. The 2nd vector should
+// not have more elements than the 1st vector. If the 2nd vector has less
+// elements, extend it with UNDEFs.
+static Value *ConcatenateTwoVectors(IRBuilder<> &Builder, Value *V1,
+ Value *V2) {
+ VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
+ VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
+ assert(VecTy1 && VecTy2 &&
+ VecTy1->getScalarType() == VecTy2->getScalarType() &&
+ "Expect two vectors with the same element type");
+ unsigned NumElts1 = VecTy1->getNumElements();
+ unsigned NumElts2 = VecTy2->getNumElements();
+ assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
+ if (NumElts1 > NumElts2) {
+ // Extend with UNDEFs.
+ Constant *ExtMask =
+ getSequentialMask(Builder, NumElts2, NumElts1 - NumElts2);
+ V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask);
+ }
+ Constant *Mask = getSequentialMask(Builder, NumElts1 + NumElts2, 0);
+ return Builder.CreateShuffleVector(V1, V2, Mask);
+// Concatenate vectors in the given list. All vectors have the same type.
+static Value *ConcatenateVectors(IRBuilder<> &Builder,
+ ArrayRef<Value *> InputList) {
+ unsigned NumVec = InputList.size();
+ assert(NumVec > 1 && "Should be at least two vectors");
+ SmallVector<Value *, 8> ResList;
+ ResList.append(InputList.begin(), InputList.end());
+ do {
+ SmallVector<Value *, 8> TmpList;
+ for (unsigned i = 0; i < NumVec - 1; i += 2) {
+ Value *V0 = ResList[i], *V1 = ResList[i + 1];
+ assert((V0->getType() == V1->getType() || i == NumVec - 2) &&
+ "Only the last vector may have a different type");
+ TmpList.push_back(ConcatenateTwoVectors(Builder, V0, V1));
+ }
+ // Push the last vector if the total number of vectors is odd.
+ if (NumVec % 2 != 0)
+ TmpList.push_back(ResList[NumVec - 1]);
+ ResList = TmpList;
+ NumVec = ResList.size();
+ } while (NumVec > 1);
+ return ResList[0];
+// Try to vectorize the interleave group that \p Instr belongs to.
+// E.g. Translate following interleaved load group (factor = 3):
+// for (i = 0; i < N; i+=3) {
+// R = Pic[i]; // Member of index 0
+// G = Pic[i+1]; // Member of index 1
+// B = Pic[i+2]; // Member of index 2
+// ... // do something to R, G, B
+// }
+// To:
+// %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B
+// %R.vec = shuffle %wide.vec, undef, <0, 3, 6, 9> ; R elements
+// %G.vec = shuffle %wide.vec, undef, <1, 4, 7, 10> ; G elements
+// %B.vec = shuffle %wide.vec, undef, <2, 5, 8, 11> ; B elements
+// Or translate following interleaved store group (factor = 3):
+// for (i = 0; i < N; i+=3) {
+// ... do something to R, G, B
+// Pic[i] = R; // Member of index 0
+// Pic[i+1] = G; // Member of index 1
+// Pic[i+2] = B; // Member of index 2
+// }
+// To:
+// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
+// %B_U.vec = shuffle %B.vec, undef, <0, 1, 2, 3, u, u, u, u>
+// %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
+// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
+// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
+void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
+ const InterleaveGroup *Group = Legal->getInterleavedAccessGroup(Instr);
+ assert(Group && "Fail to get an interleaved access group.");
+ // Skip if current instruction is not the insert position.
+ if (Instr != Group->getInsertPos())
+ return;
+ LoadInst *LI = dyn_cast<LoadInst>(Instr);
+ StoreInst *SI = dyn_cast<StoreInst>(Instr);
+ Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
+ // Prepare for the vector type of the interleaved load/store.
+ Type *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType();
+ unsigned InterleaveFactor = Group->getFactor();
+ Type *VecTy = VectorType::get(ScalarTy, InterleaveFactor * VF);
+ Type *PtrTy = VecTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
+ // Prepare for the new pointers.
+ setDebugLocFromInst(Builder, Ptr);
+ VectorParts &PtrParts = getVectorValue(Ptr);
+ SmallVector<Value *, 2> NewPtrs;
+ unsigned Index = Group->getIndex(Instr);
+ for (unsigned Part = 0; Part < UF; Part++) {
+ // Extract the pointer for current instruction from the pointer vector. A
+ // reverse access uses the pointer in the last lane.
+ Value *NewPtr = Builder.CreateExtractElement(
+ PtrParts[Part],
+ Group->isReverse() ? Builder.getInt32(VF - 1) : Builder.getInt32(0));
+ // Notice current instruction could be any index. Need to adjust the address
+ // to the member of index 0.
+ //
+ // E.g. a = A[i+1]; // Member of index 1 (Current instruction)
+ // b = A[i]; // Member of index 0
+ // Current pointer is pointed to A[i+1], adjust it to A[i].
+ //
+ // E.g. A[i+1] = a; // Member of index 1
+ // A[i] = b; // Member of index 0
+ // A[i+2] = c; // Member of index 2 (Current instruction)
+ // Current pointer is pointed to A[i+2], adjust it to A[i].
+ NewPtr = Builder.CreateGEP(NewPtr, Builder.getInt32(-Index));
+ // Cast to the vector pointer type.
+ NewPtrs.push_back(Builder.CreateBitCast(NewPtr, PtrTy));
+ }
+ setDebugLocFromInst(Builder, Instr);
+ Value *UndefVec = UndefValue::get(VecTy);
+ // Vectorize the interleaved load group.
+ if (LI) {
+ for (unsigned Part = 0; Part < UF; Part++) {
+ Instruction *NewLoadInstr = Builder.CreateAlignedLoad(
+ NewPtrs[Part], Group->getAlignment(), "wide.vec");
+ for (unsigned i = 0; i < InterleaveFactor; i++) {
+ Instruction *Member = Group->getMember(i);
+ // Skip the gaps in the group.
+ if (!Member)
+ continue;
+ Constant *StrideMask = getStridedMask(Builder, i, InterleaveFactor, VF);
+ Value *StridedVec = Builder.CreateShuffleVector(
+ NewLoadInstr, UndefVec, StrideMask, "strided.vec");
+ // If this member has different type, cast the result type.
+ if (Member->getType() != ScalarTy) {
+ VectorType *OtherVTy = VectorType::get(Member->getType(), VF);
+ StridedVec = Builder.CreateBitOrPointerCast(StridedVec, OtherVTy);
+ }
+ VectorParts &Entry = WidenMap.get(Member);
+ Entry[Part] =
+ Group->isReverse() ? reverseVector(StridedVec) : StridedVec;
+ }
+ propagateMetadata(NewLoadInstr, Instr);
+ }
+ return;
+ }
+ // The sub vector type for current instruction.
+ VectorType *SubVT = VectorType::get(ScalarTy, VF);
+ // Vectorize the interleaved store group.
+ for (unsigned Part = 0; Part < UF; Part++) {
+ // Collect the stored vector from each member.
+ SmallVector<Value *, 4> StoredVecs;
+ for (unsigned i = 0; i < InterleaveFactor; i++) {
+ // Interleaved store group doesn't allow a gap, so each index has a member
+ Instruction *Member = Group->getMember(i);
+ assert(Member && "Fail to get a member from an interleaved store group");
+ Value *StoredVec =
+ getVectorValue(dyn_cast<StoreInst>(Member)->getValueOperand())[Part];
+ if (Group->isReverse())
+ StoredVec = reverseVector(StoredVec);
+ // If this member has different type, cast it to an unified type.
+ if (StoredVec->getType() != SubVT)
+ StoredVec = Builder.CreateBitOrPointerCast(StoredVec, SubVT);
+ StoredVecs.push_back(StoredVec);
+ }
+ // Concatenate all vectors into a wide vector.
+ Value *WideVec = ConcatenateVectors(Builder, StoredVecs);
+ // Interleave the elements in the wide vector.
+ Constant *IMask = getInterleavedMask(Builder, VF, InterleaveFactor);
+ Value *IVec = Builder.CreateShuffleVector(WideVec, UndefVec, IMask,
+ "interleaved.vec");
+ Instruction *NewStoreInstr =
+ Builder.CreateAlignedStore(IVec, NewPtrs[Part], Group->getAlignment());
+ propagateMetadata(NewStoreInstr, Instr);
+ }
void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
// Attempt to issue a wide load.
LoadInst *LI = dyn_cast<LoadInst>(Instr);
@@ -1664,6 +2153,10 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
assert((LI || SI) && "Invalid Load/Store instruction");
+ // Try to vectorize the interleave group if this access is interleaved.
+ if (Legal->isAccessInterleaved(Instr))
+ return vectorizeInterleaveGroup(Instr);
Type *ScalarDataTy = LI ? LI->getType() : SI->getValueOperand()->getType();
Type *DataTy = VectorType::get(ScalarDataTy, VF);
Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
@@ -3408,6 +3901,10 @@ bool LoopVectorizationLegality::canVectorize() {
+ // Analyze interleaved memory accesses.
+ if (EnableInterleavedMemAccesses)
+ InterleaveInfo.analyzeInterleaving(Strides);
// Okay! We can vectorize. At this point we don't have any other mem analysis
// which may limit our maximum vectorization factor, so just return true with
// no restrictions.
@@ -3923,6 +4420,166 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
return true;
+void InterleavedAccessInfo::collectConstStridedAccesses(
+ MapVector<Instruction *, StrideDescriptor> &StrideAccesses,
+ const ValueToValueMap &Strides) {
+ // Holds load/store instructions in program order.
+ SmallVector<Instruction *, 16> AccessList;
+ for (auto *BB : TheLoop->getBlocks()) {
+ bool IsPred = LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
+ for (auto &I : *BB) {
+ if (!isa<LoadInst>(&I) && !isa<StoreInst>(&I))
+ continue;
+ // FIXME: Currently we can't handle mixed accesses and predicated accesses
+ if (IsPred)
+ return;
+ AccessList.push_back(&I);
+ }
+ }
+ if (AccessList.empty())
+ return;
+ auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
+ for (auto I : AccessList) {
+ LoadInst *LI = dyn_cast<LoadInst>(I);
+ StoreInst *SI = dyn_cast<StoreInst>(I);
+ Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
+ int Stride = isStridedPtr(SE, Ptr, TheLoop, Strides);
+ // The factor of the corresponding interleave group.
+ unsigned Factor = std::abs(Stride);
+ // Ignore the access if the factor is too small or too large.
+ if (Factor < 2 || Factor > MaxInterleaveGroupFactor)
+ continue;
+ const SCEV *Scev = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
+ PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+ unsigned Size = DL.getTypeAllocSize(PtrTy->getElementType());
+ // An alignment of 0 means target ABI alignment.
+ unsigned Align = LI ? LI->getAlignment() : SI->getAlignment();
+ if (!Align)
+ Align = DL.getABITypeAlignment(PtrTy->getElementType());
+ StrideAccesses[I] = StrideDescriptor(Stride, Scev, Size, Align);
+ }
+// Analyze interleaved accesses and collect them into interleave groups.
+// Notice that the vectorization on interleaved groups will change instruction
+// orders and may break dependences. But the memory dependence check guarantees
+// that there is no overlap between two pointers of different strides, element
+// sizes or underlying bases.
+// For pointers sharing the same stride, element size and underlying base, no
+// need to worry about Read-After-Write dependences and Write-After-Read
+// dependences.
+// E.g. The RAW dependence: A[i] = a;
+// b = A[i];
+// This won't exist as it is a store-load forwarding conflict, which has
+// already been checked and forbidden in the dependence check.
+// E.g. The WAR dependence: a = A[i]; // (1)
+// A[i] = b; // (2)
+// The store group of (2) is always inserted at or below (2), and the load group
+// of (1) is always inserted at or above (1). The dependence is safe.
+void InterleavedAccessInfo::analyzeInterleaving(
+ const ValueToValueMap &Strides) {
+ DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
+ // Holds all the stride accesses.
+ MapVector<Instruction *, StrideDescriptor> StrideAccesses;
+ collectConstStridedAccesses(StrideAccesses, Strides);
+ if (StrideAccesses.empty())
+ return;
+ // Holds all interleaved store groups temporarily.
+ SmallSetVector<InterleaveGroup *, 4> StoreGroups;
+ // Search the load-load/write-write pair B-A in bottom-up order and try to
+ // insert B into the interleave group of A according to 3 rules:
+ // 1. A and B have the same stride.
+ // 2. A and B have the same memory object size.
+ // 3. B belongs to the group according to the distance.
+ //
+ // The bottom-up order can avoid breaking the Write-After-Write dependences
+ // between two pointers of the same base.
+ // E.g. A[i] = a; (1)
+ // A[i] = b; (2)
+ // A[i+1] = c (3)
+ // We form the group (2)+(3) in front, so (1) has to form groups with accesses
+ // above (1), which guarantees that (1) is always above (2).
+ for (auto I = StrideAccesses.rbegin(), E = StrideAccesses.rend(); I != E;
+ ++I) {
+ Instruction *A = I->first;
+ StrideDescriptor DesA = I->second;
+ InterleaveGroup *Group = getInterleaveGroup(A);
+ if (!Group) {
+ DEBUG(dbgs() << "LV: Creating an interleave group with:" << *A << '\n');
+ Group = createInterleaveGroup(A, DesA.Stride, DesA.Align);
+ }
+ if (A->mayWriteToMemory())
+ StoreGroups.insert(Group);
+ for (auto II = std::next(I); II != E; ++II) {
+ Instruction *B = II->first;
+ StrideDescriptor DesB = II->second;
+ // Ignore if B is already in a group or B is a different memory operation.
+ if (isInterleaved(B) || A->mayReadFromMemory() != B->mayReadFromMemory())
+ continue;
+ // Check the rule 1 and 2.
+ if (DesB.Stride != DesA.Stride || DesB.Size != DesA.Size)
+ continue;
+ // Calculate the distance and prepare for the rule 3.
+ const SCEVConstant *DistToA =
+ dyn_cast<SCEVConstant>(SE->getMinusSCEV(DesB.Scev, DesA.Scev));
+ if (!DistToA)
+ continue;
+ int DistanceToA = DistToA->getValue()->getValue().getSExtValue();
+ // Skip if the distance is not multiple of size as they are not in the
+ // same group.
+ if (DistanceToA % static_cast<int>(DesA.Size))
+ continue;
+ // The index of B is the index of A plus the related index to A.
+ int IndexB =
+ Group->getIndex(A) + DistanceToA / static_cast<int>(DesA.Size);
+ // Try to insert B into the group.
+ if (Group->insertMember(B, IndexB, DesB.Align)) {
+ DEBUG(dbgs() << "LV: Inserted:" << *B << '\n'
+ << " into the interleave group with" << *A << '\n');
+ InterleaveGroupMap[B] = Group;
+ // Set the first load in program order as the insert position.
+ if (B->mayReadFromMemory())
+ Group->setInsertPos(B);
+ }
+ } // Iteration on instruction B
+ } // Iteration on instruction A
+ // Remove interleaved store groups with gaps.
+ for (InterleaveGroup *Group : StoreGroups)
+ if (Group->getNumMembers() != Group->getFactor())
+ releaseGroup(Group);
LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
// Width 1 means no vectorize
@@ -4575,6 +5232,46 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
return TTI.getAddressComputationCost(VectorTy) +
TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
+ // For an interleaved access, calculate the total cost of the whole
+ // interleave group.
+ if (Legal->isAccessInterleaved(I)) {
+ auto Group = Legal->getInterleavedAccessGroup(I);
+ assert(Group && "Fail to get an interleaved access group.");
+ // Only calculate the cost once at the insert position.
+ if (Group->getInsertPos() != I)
+ return 0;
+ unsigned InterleaveFactor = Group->getFactor();
+ Type *WideVecTy =
+ VectorType::get(VectorTy->getVectorElementType(),
+ VectorTy->getVectorNumElements() * InterleaveFactor);
+ // Holds the indices of existing members in an interleaved load group.
+ // An interleaved store group doesn't need this as it dones't allow gaps.
+ SmallVector<unsigned, 4> Indices;
+ if (LI) {
+ for (unsigned i = 0; i < InterleaveFactor; i++)
+ if (Group->getMember(i))
+ Indices.push_back(i);
+ }
+ // Calculate the cost of the whole interleaved group.
+ unsigned Cost = TTI.getInterleavedMemoryOpCost(
+ I->getOpcode(), WideVecTy, Group->getFactor(), Indices,
+ Group->getAlignment(), AS);
+ if (Group->isReverse())
+ Cost +=
+ Group->getNumMembers() *
+ TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0);
+ // FIXME: The interleaved load group with a huge gap could be even more
+ // expensive than scalar operations. Then we could ignore such group and
+ // use scalar operations instead.
+ return Cost;
+ }
// Scalarized loads/stores.
int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
bool Reverse = ConsecutiveStride < 0;
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 504425e..a3a45c8 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -317,9 +317,9 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
/// \returns the AA location that is being access by the instruction.
static AliasAnalysis::Location getLocation(Instruction *I, AliasAnalysis *AA) {
if (StoreInst *SI = dyn_cast<StoreInst>(I))
- return AA->getLocation(SI);
+ return MemoryLocation::get(SI);
if (LoadInst *LI = dyn_cast<LoadInst>(I))
- return AA->getLocation(LI);
+ return MemoryLocation::get(LI);
return AliasAnalysis::Location();
@@ -472,7 +472,7 @@ private:
/// Create a new VectorizableTree entry.
TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized) {
- VectorizableTree.push_back(TreeEntry());
+ VectorizableTree.emplace_back();
int idx = VectorizableTree.size() - 1;
TreeEntry *Last = &VectorizableTree[idx];
Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
diff --git a/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll b/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll
index 1b47341..6a5e42f 100644
--- a/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll
+++ b/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll
@@ -38,3 +38,73 @@ for.body:
ret void
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; unsigned i, j;
+; for (i = 1; i < SIZE; i++) {
+; for (j = i; j < SIZE; j++) {
+; a[i][j] = a[i+1][j-1] + 2;
+; }
+; }
+; Extends the previous example to coupled MIV subscripts.
+@a = global [10004 x [10004 x i32]] zeroinitializer, align 16
+; Function Attrs: nounwind uwtable
+define void @coupled_miv_type_mismatch(i32 %n) #0 {
+; CHECK-LABEL: 'Dependence Analysis' for function 'coupled_miv_type_mismatch'
+; DELIN-LABEL: 'Dependence Analysis' for function 'coupled_miv_type_mismatch'
+ br label %for.cond
+; CHECK: da analyze - input [0 *]!
+; CHECK: da analyze - anti [1 *]!
+; CHECK: da analyze - none!
+; DELIN: da analyze - input [0 *]!
+; DELIN: da analyze - anti [1 *]!
+; DELIN: da analyze - none!
+for.cond: ; preds = %for.inc11, %entry
+ %indvars.iv11 = phi i64 [ %indvars.iv.next12, %for.inc11 ], [ 1, %entry ]
+ %exitcond14 = icmp ne i64 %indvars.iv11, 10000
+ br i1 %exitcond14, label %for.cond1.preheader, label %for.end13
+for.cond1.preheader: ; preds = %for.cond
+ %0 = trunc i64 %indvars.iv11 to i32
+ br label %for.cond1
+for.cond1: ; preds = %for.cond1.preheader, %for.body3
+ %indvars.iv8 = phi i64 [ %indvars.iv11, %for.cond1.preheader ], [ %indvars.iv.next9, %for.body3 ]
+ %j.0 = phi i32 [ %inc, %for.body3 ], [ %0, %for.cond1.preheader ]
+ %lftr.wideiv = trunc i64 %indvars.iv8 to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, 10000
+ br i1 %exitcond, label %for.body3, label %for.inc11
+for.body3: ; preds = %for.cond1
+ %sub = add nsw i32 %j.0, -1
+ %idxprom = zext i32 %sub to i64
+ %1 = add nuw nsw i64 %indvars.iv11, 1
+ %arrayidx5 = getelementptr inbounds [10004 x [10004 x i32]], [10004 x [10004 x i32]]* @a, i64 0, i64 %1, i64 %idxprom
+ %2 = load i32, i32* %arrayidx5, align 4
+ %add6 = add nsw i32 %2, 2
+ %arrayidx10 = getelementptr inbounds [10004 x [10004 x i32]], [10004 x [10004 x i32]]* @a, i64 0, i64 %indvars.iv11, i64 %indvars.iv8
+ store i32 %add6, i32* %arrayidx10, align 4
+ %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1
+ %inc = add nuw nsw i32 %j.0, 1
+ br label %for.cond1
+for.inc11: ; preds = %for.cond1
+ %indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1
+ br label %for.cond
+for.end13: ; preds = %for.cond
+ ret void
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+!llvm.ident = !{!0}
+!0 = !{!"clang version 3.7.0 ( 93a05fb75ee3411d24e8b2b184fc766a5318403e) ( 166d93d26efc912b517739f64d054a435e8e95cd)"}
diff --git a/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll b/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
new file mode 100644
index 0000000..f9871c6
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
@@ -0,0 +1,58 @@
+; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
+; 3 reads and 3 writes should need 12 memchecks
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnueabi"
+; CHECK: Memory dependences are safe with run-time checks
+; Memory dependecies have labels starting from 0, so in
+; order to verify that we have n checks, we look for
+; (n-1): and not n:.
+; CHECK: Run-time memory checks:
+; CHECK: 11:
+; CHECK-NOT: 12:
+define void @testf(i16* %a,
+ i16* %b,
+ i16* %c,
+ i16* %d,
+ i16* %e,
+ i16* %f) {
+ br label %for.body
+for.body: ; preds = %for.body, %entry
+ %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+ %add = add nuw nsw i64 %ind, 1
+ %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
+ %loadA = load i16, i16* %arrayidxA, align 2
+ %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
+ %loadB = load i16, i16* %arrayidxB, align 2
+ %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %ind
+ %loadC = load i16, i16* %arrayidxC, align 2
+ %mul = mul i16 %loadB, %loadA
+ %mul1 = mul i16 %mul, %loadC
+ %arrayidxD = getelementptr inbounds i16, i16* %d, i64 %ind
+ store i16 %mul1, i16* %arrayidxD, align 2
+ %arrayidxE = getelementptr inbounds i16, i16* %e, i64 %ind
+ store i16 %mul, i16* %arrayidxE, align 2
+ %arrayidxF = getelementptr inbounds i16, i16* %f, i64 %ind
+ store i16 %mul1, i16* %arrayidxF, align 2
+ %exitcond = icmp eq i64 %add, 20
+ br i1 %exitcond, label %for.end, label %for.body
+for.end: ; preds = %for.body
+ ret void
diff --git a/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll b/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll
new file mode 100644
index 0000000..7357356
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll
@@ -0,0 +1,540 @@
+; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+; Following cases are no dependence.
+; void nodep_Read_Write(int *A) {
+; int *B = A + 1;
+; for (unsigned i = 0; i < 1024; i+=3)
+; B[i] = A[i] + 1;
+; }
+; CHECK: function 'nodep_Read_Write':
+; CHECK-NEXT: for.body:
+; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Run-time memory checks:
+define void @nodep_Read_Write(i32* nocapture %A) {
+ %add.ptr = getelementptr inbounds i32, i32* %A, i64 1
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret void
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %0, 1
+ %arrayidx2 = getelementptr inbounds i32, i32* %add.ptr, i64 %indvars.iv
+ store i32 %add, i32* %arrayidx2, align 4
+ = add nuw nsw i64 %indvars.iv, 3
+ %cmp = icmp ult i64, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+; int nodep_Write_Read(int *A) {
+; int sum = 0;
+; for (unsigned i = 0; i < 1024; i+=4) {
+; A[i] = i;
+; sum += A[i+3];
+; }
+; return sum;
+; }
+; CHECK: function 'nodep_Write_Read':
+; CHECK-NEXT: for.body:
+; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Run-time memory checks:
+define i32 @nodep_Write_Read(i32* nocapture %A) {
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret i32 %add3
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %sum.013 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %0 = trunc i64 %indvars.iv to i32
+ store i32 %0, i32* %arrayidx, align 4
+ %1 = or i64 %indvars.iv, 3
+ %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %1
+ %2 = load i32, i32* %arrayidx2, align 4
+ %add3 = add nsw i32 %2, %sum.013
+ = add nuw nsw i64 %indvars.iv, 4
+ %cmp = icmp ult i64, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+; void nodep_Write_Write(int *A) {
+; for (unsigned i = 0; i < 1024; i+=2) {
+; A[i] = i;
+; A[i+1] = i+1;
+; }
+; }
+; CHECK: function 'nodep_Write_Write':
+; CHECK-NEXT: for.body:
+; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Run-time memory checks:
+define void @nodep_Write_Write(i32* nocapture %A) {
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret void
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %0 = trunc i64 %indvars.iv to i32
+ store i32 %0, i32* %arrayidx, align 4
+ %1 = or i64 %indvars.iv, 1
+ %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %1
+ %2 = trunc i64 %1 to i32
+ store i32 %2, i32* %arrayidx3, align 4
+ = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp ult i64, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+; Following cases are unsafe depdences and are not vectorizable.
+; void unsafe_Read_Write(int *A) {
+; for (unsigned i = 0; i < 1024; i+=3)
+; A[i+3] = A[i] + 1;
+; }
+; CHECK: function 'unsafe_Read_Write':
+; CHECK-NEXT: for.body:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop
+; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Backward:
+; CHECK-NEXT: %0 = load i32, i32* %arrayidx, align 4 ->
+; CHECK-NEXT: store i32 %add, i32* %arrayidx3, align 4
+define void @unsafe_Read_Write(i32* nocapture %A) {
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret void
+for.body: ; preds = %entry, %for.body
+ %i.010 = phi i32 [ 0, %entry ], [ %add1, %for.body ]
+ %idxprom = zext i32 %i.010 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %0, 1
+ %add1 = add i32 %i.010, 3
+ %idxprom2 = zext i32 %add1 to i64
+ %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %idxprom2
+ store i32 %add, i32* %arrayidx3, align 4
+ %cmp = icmp ult i32 %add1, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+; int unsafe_Write_Read(int *A) {
+; int sum = 0;
+; for (unsigned i = 0; i < 1024; i+=4) {
+; A[i] = i;
+; sum += A[i+4];
+; }
+; return sum;
+; }
+; CHECK: function 'unsafe_Write_Read':
+; CHECK-NEXT: for.body:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop
+; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Backward:
+; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
+; CHECK-NEXT: %1 = load i32, i32* %arrayidx2, align 4
+define i32 @unsafe_Write_Read(i32* nocapture %A) {
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret i32 %add3
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %sum.013 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %0 = trunc i64 %indvars.iv to i32
+ store i32 %0, i32* %arrayidx, align 4
+ = add nuw nsw i64 %indvars.iv, 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %A, i64
+ %1 = load i32, i32* %arrayidx2, align 4
+ %add3 = add nsw i32 %1, %sum.013
+ %cmp = icmp ult i64, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+; void unsafe_Write_Write(int *A) {
+; for (unsigned i = 0; i < 1024; i+=2) {
+; A[i] = i;
+; A[i+2] = i+1;
+; }
+; }
+; CHECK: function 'unsafe_Write_Write':
+; CHECK-NEXT: for.body:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop
+; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Backward:
+; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
+; CHECK-NEXT: store i32 %2, i32* %arrayidx3, align 4
+define void @unsafe_Write_Write(i32* nocapture %A) {
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret void
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %0 = trunc i64 %indvars.iv to i32
+ store i32 %0, i32* %arrayidx, align 4
+ %1 = or i64 %indvars.iv, 1
+ = add nuw nsw i64 %indvars.iv, 2
+ %arrayidx3 = getelementptr inbounds i32, i32* %A, i64
+ %2 = trunc i64 %1 to i32
+ store i32 %2, i32* %arrayidx3, align 4
+ %cmp = icmp ult i64, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+; Following cases check that strided accesses can be vectorized.
+; void vectorizable_Read_Write(int *A) {
+; int *B = A + 4;
+; for (unsigned i = 0; i < 1024; i+=2)
+; B[i] = A[i] + 1;
+; }
+; CHECK: function 'vectorizable_Read_Write':
+; CHECK-NEXT: for.body:
+; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: BackwardVectorizable:
+; CHECK-NEXT: %0 = load i32, i32* %arrayidx, align 4 ->
+; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
+define void @vectorizable_Read_Write(i32* nocapture %A) {
+ %add.ptr = getelementptr inbounds i32, i32* %A, i64 4
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret void
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %0, 1
+ %arrayidx2 = getelementptr inbounds i32, i32* %add.ptr, i64 %indvars.iv
+ store i32 %add, i32* %arrayidx2, align 4
+ = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp ult i64, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+; int vectorizable_Write_Read(int *A) {
+; int *B = A + 4;
+; int sum = 0;
+; for (unsigned i = 0; i < 1024; i+=2) {
+; A[i] = i;
+; sum += B[i];
+; }
+; return sum;
+; }
+; CHECK: function 'vectorizable_Write_Read':
+; CHECK-NEXT: for.body:
+; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: BackwardVectorizable:
+; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
+; CHECK-NEXT: %1 = load i32, i32* %arrayidx2, align 4
+define i32 @vectorizable_Write_Read(i32* nocapture %A) {
+ %add.ptr = getelementptr inbounds i32, i32* %A, i64 4
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret i32 %add
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %sum.013 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %0 = trunc i64 %indvars.iv to i32
+ store i32 %0, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %add.ptr, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx2, align 4
+ %add = add nsw i32 %1, %sum.013
+ = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp ult i64, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+; void vectorizable_Write_Write(int *A) {
+; int *B = A + 4;
+; for (unsigned i = 0; i < 1024; i+=2) {
+; A[i] = i;
+; B[i] = i+1;
+; }
+; }
+; CHECK: function 'vectorizable_Write_Write':
+; CHECK-NEXT: for.body:
+; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: BackwardVectorizable:
+; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
+; CHECK-NEXT: store i32 %2, i32* %arrayidx2, align 4
+define void @vectorizable_Write_Write(i32* nocapture %A) {
+ %add.ptr = getelementptr inbounds i32, i32* %A, i64 4
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret void
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %0 = trunc i64 %indvars.iv to i32
+ store i32 %0, i32* %arrayidx, align 4
+ %1 = or i64 %indvars.iv, 1
+ %arrayidx2 = getelementptr inbounds i32, i32* %add.ptr, i64 %indvars.iv
+ %2 = trunc i64 %1 to i32
+ store i32 %2, i32* %arrayidx2, align 4
+ = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp ult i64, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+; void vectorizable_unscaled_Read_Write(int *A) {
+; int *B = (int *)((char *)A + 14);
+; for (unsigned i = 0; i < 1024; i+=2)
+; B[i] = A[i] + 1;
+; }
+; FIXME: This case looks like previous case @vectorizable_Read_Write. It sould
+; be vectorizable.
+; CHECK: function 'vectorizable_unscaled_Read_Write':
+; CHECK-NEXT: for.body:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop
+; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: BackwardVectorizableButPreventsForwarding:
+; CHECK-NEXT: %2 = load i32, i32* %arrayidx, align 4 ->
+; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
+define void @vectorizable_unscaled_Read_Write(i32* nocapture %A) {
+ %0 = bitcast i32* %A to i8*
+ %add.ptr = getelementptr inbounds i8, i8* %0, i64 14
+ %1 = bitcast i8* %add.ptr to i32*
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret void
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %2 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %2, 1
+ %arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
+ store i32 %add, i32* %arrayidx2, align 4
+ = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp ult i64, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+; int vectorizable_unscaled_Write_Read(int *A) {
+; int *B = (int *)((char *)A + 17);
+; int sum = 0;
+; for (unsigned i = 0; i < 1024; i+=2) {
+; A[i] = i;
+; sum += B[i];
+; }
+; return sum;
+; }
+; CHECK: for function 'vectorizable_unscaled_Write_Read':
+; CHECK-NEXT: for.body:
+; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: BackwardVectorizable:
+; CHECK-NEXT: store i32 %2, i32* %arrayidx, align 4 ->
+; CHECK-NEXT: %3 = load i32, i32* %arrayidx2, align 4
+define i32 @vectorizable_unscaled_Write_Read(i32* nocapture %A) {
+ %0 = bitcast i32* %A to i8*
+ %add.ptr = getelementptr inbounds i8, i8* %0, i64 17
+ %1 = bitcast i8* %add.ptr to i32*
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret i32 %add
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %sum.013 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %2 = trunc i64 %indvars.iv to i32
+ store i32 %2, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
+ %3 = load i32, i32* %arrayidx2, align 4
+ %add = add nsw i32 %3, %sum.013
+ = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp ult i64, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+; void unsafe_unscaled_Read_Write(int *A) {
+; int *B = (int *)((char *)A + 11);
+; for (unsigned i = 0; i < 1024; i+=2)
+; B[i] = A[i] + 1;
+; }
+; CHECK: function 'unsafe_unscaled_Read_Write':
+; CHECK-NEXT: for.body:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop
+; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Backward:
+; CHECK-NEXT: %2 = load i32, i32* %arrayidx, align 4 ->
+; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
+define void @unsafe_unscaled_Read_Write(i32* nocapture %A) {
+ %0 = bitcast i32* %A to i8*
+ %add.ptr = getelementptr inbounds i8, i8* %0, i64 11
+ %1 = bitcast i8* %add.ptr to i32*
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret void
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %2 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %2, 1
+ %arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
+ store i32 %add, i32* %arrayidx2, align 4
+ = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp ult i64, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+; CHECK: function 'unsafe_unscaled_Read_Write2':
+; CHECK-NEXT: for.body:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop
+; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Backward:
+; CHECK-NEXT: %2 = load i32, i32* %arrayidx, align 4 ->
+; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
+; void unsafe_unscaled_Read_Write2(int *A) {
+; int *B = (int *)((char *)A + 1);
+; for (unsigned i = 0; i < 1024; i+=2)
+; B[i] = A[i] + 1;
+; }
+define void @unsafe_unscaled_Read_Write2(i32* nocapture %A) {
+ %0 = bitcast i32* %A to i8*
+ %add.ptr = getelementptr inbounds i8, i8* %0, i64 1
+ %1 = bitcast i8* %add.ptr to i32*
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret void
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %2 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %2, 1
+ %arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
+ store i32 %add, i32* %arrayidx2, align 4
+ = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp ult i64, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+; Following case checks that interleaved stores have dependences with another
+; store and can not pass dependence check.
+; void interleaved_stores(int *A) {
+; int *B = (int *) ((char *)A + 1);
+; for(int i = 0; i < 1024; i+=2) {
+; B[i] = i; // (1)
+; A[i+1] = i + 1; // (2)
+; B[i+1] = i + 1; // (3)
+; }
+; }
+; The access (2) has overlaps with (1) and (3).
+; CHECK: function 'interleaved_stores':
+; CHECK-NEXT: for.body:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop
+; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Backward:
+; CHECK-NEXT: store i32 %4, i32* %arrayidx5, align 4 ->
+; CHECK-NEXT: store i32 %4, i32* %arrayidx9, align 4
+; CHECK: Backward:
+; CHECK-NEXT: store i32 %2, i32* %arrayidx2, align 4 ->
+; CHECK-NEXT: store i32 %4, i32* %arrayidx5, align 4
+define void @interleaved_stores(i32* nocapture %A) {
+ %0 = bitcast i32* %A to i8*
+ %incdec.ptr = getelementptr inbounds i8, i8* %0, i64 1
+ %1 = bitcast i8* %incdec.ptr to i32*
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret void
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %2 = trunc i64 %indvars.iv to i32
+ %arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
+ store i32 %2, i32* %arrayidx2, align 4
+ %3 = or i64 %indvars.iv, 1
+ %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %3
+ %4 = trunc i64 %3 to i32
+ store i32 %4, i32* %arrayidx5, align 4
+ %arrayidx9 = getelementptr inbounds i32, i32* %1, i64 %3
+ store i32 %4, i32* %arrayidx9, align 4
+ = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp slt i64, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
diff --git a/test/Analysis/ValueTracking/memory-dereferenceable.ll b/test/Analysis/ValueTracking/memory-dereferenceable.ll
index dae64d7..f49f4f7 100644
--- a/test/Analysis/ValueTracking/memory-dereferenceable.ll
+++ b/test/Analysis/ValueTracking/memory-dereferenceable.ll
@@ -10,6 +10,9 @@ declare zeroext i1 @return_i1()
@globalstr = global [6 x i8] c"hello\00"
@globali32ptr = external global i32*
+%struct.A = type { [8 x i8], [5 x i8] }
+@globalstruct = external global %struct.A
define void @test(i32 addrspace(1)* dereferenceable(8) %dparam) gc "statepoint-example" {
; CHECK: The following are dereferenceable:
; CHECK: %globalptr
@@ -22,6 +25,8 @@ define void @test(i32 addrspace(1)* dereferenceable(8) %dparam) gc "statepoint-e
; CHECK-NOT: %d2_load
; CHECK-NOT: %d_or_null_load
; CHECK: %d_or_null_non_null_load
+; CHECK: %within_allocation
+; CHECK-NOT: %outside_allocation
%globalptr = getelementptr inbounds [6 x i8], [6 x i8]* @globalstr, i32 0, i32 0
%load1 = load i8, i8* %globalptr
@@ -54,6 +59,14 @@ entry:
%d_or_null_non_null_load = load i32*, i32** @globali32ptr, !nonnull !2, !dereferenceable_or_null !0
%load10 = load i32, i32* %d_or_null_non_null_load
+ ; It's OK to overrun static array size as long as we stay within underlying object size
+ %within_allocation = getelementptr inbounds %struct.A, %struct.A* @globalstruct, i64 0, i32 0, i64 10
+ %load11 = load i8, i8* %within_allocation
+ ; GEP is outside the underlying object size
+ %outside_allocation = getelementptr inbounds %struct.A, %struct.A* @globalstruct, i64 0, i32 1, i64 10
+ %load12 = load i8, i8* %outside_allocation
ret void
diff --git a/test/Assembler/mdcompileunit.ll b/test/Assembler/dicompileunit.ll
index dc136f0..dc136f0 100644
--- a/test/Assembler/mdcompileunit.ll
+++ b/test/Assembler/dicompileunit.ll
diff --git a/test/Assembler/mdexpression.ll b/test/Assembler/diexpression.ll
index 31be86c..31be86c 100644
--- a/test/Assembler/mdexpression.ll
+++ b/test/Assembler/diexpression.ll
diff --git a/test/Assembler/mdfile-escaped-chars.ll b/test/Assembler/difile-escaped-chars.ll
index 5900fdc..5900fdc 100644
--- a/test/Assembler/mdfile-escaped-chars.ll
+++ b/test/Assembler/difile-escaped-chars.ll
diff --git a/test/Assembler/mdglobalvariable.ll b/test/Assembler/diglobalvariable.ll
index 0d027d3..0d027d3 100644
--- a/test/Assembler/mdglobalvariable.ll
+++ b/test/Assembler/diglobalvariable.ll
diff --git a/test/Assembler/mdimportedentity.ll b/test/Assembler/diimportedentity.ll
index 929267e..929267e 100644
--- a/test/Assembler/mdimportedentity.ll
+++ b/test/Assembler/diimportedentity.ll
diff --git a/test/Assembler/mdlexicalblock.ll b/test/Assembler/dilexicalblock.ll
index 2cab372..2cab372 100644
--- a/test/Assembler/mdlexicalblock.ll
+++ b/test/Assembler/dilexicalblock.ll
diff --git a/test/Assembler/dilocalvariable-arg-large.ll b/test/Assembler/dilocalvariable-arg-large.ll
new file mode 100644
index 0000000..7788186
--- /dev/null
+++ b/test/Assembler/dilocalvariable-arg-large.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
+; CHECK: !named = !{!0, !1}
+!named = !{!0, !1}
+!0 = distinct !DISubprogram()
+; CHECK: !1 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "foo", arg: 65535, scope: !0)
+!1 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "foo", arg: 65535, scope: !0)
diff --git a/test/Assembler/mdlocalvariable.ll b/test/Assembler/dilocalvariable.ll
index 312373c..312373c 100644
--- a/test/Assembler/mdlocalvariable.ll
+++ b/test/Assembler/dilocalvariable.ll
diff --git a/test/Assembler/mdlocation.ll b/test/Assembler/dilocation.ll
index a468f8a..a468f8a 100644
--- a/test/Assembler/mdlocation.ll
+++ b/test/Assembler/dilocation.ll
diff --git a/test/Assembler/mdnamespace.ll b/test/Assembler/dinamespace.ll
index 5d8b6b3..5d8b6b3 100644
--- a/test/Assembler/mdnamespace.ll
+++ b/test/Assembler/dinamespace.ll
diff --git a/test/Assembler/mdobjcproperty.ll b/test/Assembler/diobjcproperty.ll
index ca13e27..ca13e27 100644
--- a/test/Assembler/mdobjcproperty.ll
+++ b/test/Assembler/diobjcproperty.ll
diff --git a/test/Assembler/mdsubprogram.ll b/test/Assembler/disubprogram.ll
index 3fa1081..3fa1081 100644
--- a/test/Assembler/mdsubprogram.ll
+++ b/test/Assembler/disubprogram.ll
diff --git a/test/Assembler/mdsubrange-empty-array.ll b/test/Assembler/disubrange-empty-array.ll
index 7b5279e..7b5279e 100644
--- a/test/Assembler/mdsubrange-empty-array.ll
+++ b/test/Assembler/disubrange-empty-array.ll
diff --git a/test/Assembler/mdsubroutinetype.ll b/test/Assembler/disubroutinetype.ll
index 4ec2be7..4ec2be7 100644
--- a/test/Assembler/mdsubroutinetype.ll
+++ b/test/Assembler/disubroutinetype.ll
diff --git a/test/Assembler/mdtemplateparameter.ll b/test/Assembler/ditemplateparameter.ll
index a356ad4..a356ad4 100644
--- a/test/Assembler/mdtemplateparameter.ll
+++ b/test/Assembler/ditemplateparameter.ll
diff --git a/test/Assembler/mdtype-large-values.ll b/test/Assembler/ditype-large-values.ll
index a371ac6..a371ac6 100644
--- a/test/Assembler/mdtype-large-values.ll
+++ b/test/Assembler/ditype-large-values.ll
diff --git a/test/Assembler/getelementptr.ll b/test/Assembler/getelementptr.ll
index ee88346..0a0fddf 100644
--- a/test/Assembler/getelementptr.ll
+++ b/test/Assembler/getelementptr.ll
@@ -19,6 +19,10 @@
@y = global i32* getelementptr ({ i32, i32 }, { i32, i32 }* @x, i16 42, i32 0)
; CHECK: @y = global i32* getelementptr ({ i32, i32 }, { i32, i32 }* @x, i16 42, i32 0)
+@PR23753_a = external global i8
+@PR23753_b = global i8* getelementptr (i8, i8* @PR23753_a, i64 ptrtoint (i8* @PR23753_a to i64))
+; CHECK: @PR23753_b = global i8* getelementptr (i8, i8* @PR23753_a, i64 ptrtoint (i8* @PR23753_a to i64))
; See if i92 indices work too.
define i32 *@test({i32, i32}* %t, i92 %n) {
; CHECK: @test
diff --git a/test/Assembler/invalid-mdcompileunit-language-bad.ll b/test/Assembler/invalid-dicompileunit-language-bad.ll
index e6f49f3..e6f49f3 100644
--- a/test/Assembler/invalid-mdcompileunit-language-bad.ll
+++ b/test/Assembler/invalid-dicompileunit-language-bad.ll
diff --git a/test/Assembler/invalid-mdcompileunit-language-overflow.ll b/test/Assembler/invalid-dicompileunit-language-overflow.ll
index c558f7a..c558f7a 100644
--- a/test/Assembler/invalid-mdcompileunit-language-overflow.ll
+++ b/test/Assembler/invalid-dicompileunit-language-overflow.ll
diff --git a/test/Assembler/invalid-mdcompileunit-missing-language.ll b/test/Assembler/invalid-dicompileunit-missing-language.ll
index 15631b7..15631b7 100644
--- a/test/Assembler/invalid-mdcompileunit-missing-language.ll
+++ b/test/Assembler/invalid-dicompileunit-missing-language.ll
diff --git a/test/Assembler/invalid-mdcompileunit-null-file.ll b/test/Assembler/invalid-dicompileunit-null-file.ll
index cc1892e..cc1892e 100644
--- a/test/Assembler/invalid-mdcompileunit-null-file.ll
+++ b/test/Assembler/invalid-dicompileunit-null-file.ll
diff --git a/test/Assembler/invalid-mdcompositetype-missing-tag.ll b/test/Assembler/invalid-dicompositetype-missing-tag.ll
index e68c014..e68c014 100644
--- a/test/Assembler/invalid-mdcompositetype-missing-tag.ll
+++ b/test/Assembler/invalid-dicompositetype-missing-tag.ll
diff --git a/test/Assembler/invalid-mdderivedtype-missing-basetype.ll b/test/Assembler/invalid-diderivedtype-missing-basetype.ll
index 308c2ea..308c2ea 100644
--- a/test/Assembler/invalid-mdderivedtype-missing-basetype.ll
+++ b/test/Assembler/invalid-diderivedtype-missing-basetype.ll
diff --git a/test/Assembler/invalid-mdderivedtype-missing-tag.ll b/test/Assembler/invalid-diderivedtype-missing-tag.ll
index fd286f4..fd286f4 100644
--- a/test/Assembler/invalid-mdderivedtype-missing-tag.ll
+++ b/test/Assembler/invalid-diderivedtype-missing-tag.ll
diff --git a/test/Assembler/invalid-mdenumerator-missing-name.ll b/test/Assembler/invalid-dienumerator-missing-name.ll
index 656d343..656d343 100644
--- a/test/Assembler/invalid-mdenumerator-missing-name.ll
+++ b/test/Assembler/invalid-dienumerator-missing-name.ll
diff --git a/test/Assembler/invalid-mdenumerator-missing-value.ll b/test/Assembler/invalid-dienumerator-missing-value.ll
index 0eee3be..0eee3be 100644
--- a/test/Assembler/invalid-mdenumerator-missing-value.ll
+++ b/test/Assembler/invalid-dienumerator-missing-value.ll
diff --git a/test/Assembler/invalid-mdexpression-large.ll b/test/Assembler/invalid-diexpression-large.ll
index 32b77ee..32b77ee 100644
--- a/test/Assembler/invalid-mdexpression-large.ll
+++ b/test/Assembler/invalid-diexpression-large.ll
diff --git a/test/Assembler/invalid-mdexpression-verify.ll b/test/Assembler/invalid-diexpression-verify.ll
index 50d6943..50d6943 100644
--- a/test/Assembler/invalid-mdexpression-verify.ll
+++ b/test/Assembler/invalid-diexpression-verify.ll
diff --git a/test/Assembler/invalid-mdfile-missing-directory.ll b/test/Assembler/invalid-difile-missing-directory.ll
index a54a22f..a54a22f 100644
--- a/test/Assembler/invalid-mdfile-missing-directory.ll
+++ b/test/Assembler/invalid-difile-missing-directory.ll
diff --git a/test/Assembler/invalid-mdfile-missing-filename.ll b/test/Assembler/invalid-difile-missing-filename.ll
index 17bf3da..17bf3da 100644
--- a/test/Assembler/invalid-mdfile-missing-filename.ll
+++ b/test/Assembler/invalid-difile-missing-filename.ll
diff --git a/test/Assembler/invalid-mdglobalvariable-empty-name.ll b/test/Assembler/invalid-diglobalvariable-empty-name.ll
index a4e69f3..a4e69f3 100644
--- a/test/Assembler/invalid-mdglobalvariable-empty-name.ll
+++ b/test/Assembler/invalid-diglobalvariable-empty-name.ll
diff --git a/test/Assembler/invalid-mdglobalvariable-missing-name.ll b/test/Assembler/invalid-diglobalvariable-missing-name.ll
index baf4d73..baf4d73 100644
--- a/test/Assembler/invalid-mdglobalvariable-missing-name.ll
+++ b/test/Assembler/invalid-diglobalvariable-missing-name.ll
diff --git a/test/Assembler/invalid-mdimportedentity-missing-scope.ll b/test/Assembler/invalid-diimportedentity-missing-scope.ll
index 06164e8..06164e8 100644
--- a/test/Assembler/invalid-mdimportedentity-missing-scope.ll
+++ b/test/Assembler/invalid-diimportedentity-missing-scope.ll
diff --git a/test/Assembler/invalid-mdimportedentity-missing-tag.ll b/test/Assembler/invalid-diimportedentity-missing-tag.ll
index 996b1ff..996b1ff 100644
--- a/test/Assembler/invalid-mdimportedentity-missing-tag.ll
+++ b/test/Assembler/invalid-diimportedentity-missing-tag.ll
diff --git a/test/Assembler/invalid-mdlexicalblock-missing-scope.ll b/test/Assembler/invalid-dilexicalblock-missing-scope.ll
index 6108e69..6108e69 100644
--- a/test/Assembler/invalid-mdlexicalblock-missing-scope.ll
+++ b/test/Assembler/invalid-dilexicalblock-missing-scope.ll
diff --git a/test/Assembler/invalid-mdlexicalblock-null-scope.ll b/test/Assembler/invalid-dilexicalblock-null-scope.ll
index 7457b60..7457b60 100644
--- a/test/Assembler/invalid-mdlexicalblock-null-scope.ll
+++ b/test/Assembler/invalid-dilexicalblock-null-scope.ll
diff --git a/test/Assembler/invalid-mdlexicalblockfile-missing-discriminator.ll b/test/Assembler/invalid-dilexicalblockfile-missing-discriminator.ll
index bd8395c..bd8395c 100644
--- a/test/Assembler/invalid-mdlexicalblockfile-missing-discriminator.ll
+++ b/test/Assembler/invalid-dilexicalblockfile-missing-discriminator.ll
diff --git a/test/Assembler/invalid-mdlexicalblockfile-missing-scope.ll b/test/Assembler/invalid-dilexicalblockfile-missing-scope.ll
index fd03750..fd03750 100644
--- a/test/Assembler/invalid-mdlexicalblockfile-missing-scope.ll
+++ b/test/Assembler/invalid-dilexicalblockfile-missing-scope.ll
diff --git a/test/Assembler/invalid-mdlexicalblockfile-null-scope.ll b/test/Assembler/invalid-dilexicalblockfile-null-scope.ll
index ae1a5d4..ae1a5d4 100644
--- a/test/Assembler/invalid-mdlexicalblockfile-null-scope.ll
+++ b/test/Assembler/invalid-dilexicalblockfile-null-scope.ll
diff --git a/test/Assembler/invalid-dilocalvariable-arg-large.ll b/test/Assembler/invalid-dilocalvariable-arg-large.ll
new file mode 100644
index 0000000..d62da60
--- /dev/null
+++ b/test/Assembler/invalid-dilocalvariable-arg-large.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+!0 = !DILocalVariable(tag: DW_TAG_arg_variable, scope: !{}, arg: 65535)
+; CHECK: <stdin>:[[@LINE+1]]:66: error: value for 'arg' too large, limit is 65535
+!1 = !DILocalVariable(tag: DW_TAG_arg_variable, scope: !{}, arg: 65536)
diff --git a/test/Assembler/invalid-dilocalvariable-arg-negative.ll b/test/Assembler/invalid-dilocalvariable-arg-negative.ll
new file mode 100644
index 0000000..08e370a
--- /dev/null
+++ b/test/Assembler/invalid-dilocalvariable-arg-negative.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+!0 = !DILocalVariable(tag: DW_TAG_arg_variable, scope: !{}, arg: 0)
+; CHECK: <stdin>:[[@LINE+1]]:66: error: expected unsigned integer
+!1 = !DILocalVariable(tag: DW_TAG_arg_variable, scope: !{}, arg: -1)
diff --git a/test/Assembler/invalid-mdlocalvariable-missing-scope.ll b/test/Assembler/invalid-dilocalvariable-missing-scope.ll
index 45dcad7..45dcad7 100644
--- a/test/Assembler/invalid-mdlocalvariable-missing-scope.ll
+++ b/test/Assembler/invalid-dilocalvariable-missing-scope.ll
diff --git a/test/Assembler/invalid-mdlocalvariable-missing-tag.ll b/test/Assembler/invalid-dilocalvariable-missing-tag.ll
index 18062ed..18062ed 100644
--- a/test/Assembler/invalid-mdlocalvariable-missing-tag.ll
+++ b/test/Assembler/invalid-dilocalvariable-missing-tag.ll
diff --git a/test/Assembler/invalid-mdlocalvariable-null-scope.ll b/test/Assembler/invalid-dilocalvariable-null-scope.ll
index 859412a..859412a 100644
--- a/test/Assembler/invalid-mdlocalvariable-null-scope.ll
+++ b/test/Assembler/invalid-dilocalvariable-null-scope.ll
diff --git a/test/Assembler/invalid-mdlocation-field-bad.ll b/test/Assembler/invalid-dilocation-field-bad.ll
index e68aa45..e68aa45 100644
--- a/test/Assembler/invalid-mdlocation-field-bad.ll
+++ b/test/Assembler/invalid-dilocation-field-bad.ll
diff --git a/test/Assembler/invalid-mdlocation-field-twice.ll b/test/Assembler/invalid-dilocation-field-twice.ll
index d1dab68..d1dab68 100644
--- a/test/Assembler/invalid-mdlocation-field-twice.ll
+++ b/test/Assembler/invalid-dilocation-field-twice.ll
diff --git a/test/Assembler/invalid-mdlocation-missing-scope-2.ll b/test/Assembler/invalid-dilocation-missing-scope-2.ll
index 380e3fd..380e3fd 100644
--- a/test/Assembler/invalid-mdlocation-missing-scope-2.ll
+++ b/test/Assembler/invalid-dilocation-missing-scope-2.ll
diff --git a/test/Assembler/invalid-mdlocation-missing-scope.ll b/test/Assembler/invalid-dilocation-missing-scope.ll
index fb0cd86..fb0cd86 100644
--- a/test/Assembler/invalid-mdlocation-missing-scope.ll
+++ b/test/Assembler/invalid-dilocation-missing-scope.ll
diff --git a/test/Assembler/invalid-mdlocation-null-scope.ll b/test/Assembler/invalid-dilocation-null-scope.ll
index 38c59e1..38c59e1 100644
--- a/test/Assembler/invalid-mdlocation-null-scope.ll
+++ b/test/Assembler/invalid-dilocation-null-scope.ll
diff --git a/test/Assembler/invalid-mdlocation-overflow-column.ll b/test/Assembler/invalid-dilocation-overflow-column.ll
index 043f84d..043f84d 100644
--- a/test/Assembler/invalid-mdlocation-overflow-column.ll
+++ b/test/Assembler/invalid-dilocation-overflow-column.ll
diff --git a/test/Assembler/invalid-mdlocation-overflow-line.ll b/test/Assembler/invalid-dilocation-overflow-line.ll
index 7a67742..7a67742 100644
--- a/test/Assembler/invalid-mdlocation-overflow-line.ll
+++ b/test/Assembler/invalid-dilocation-overflow-line.ll
diff --git a/test/Assembler/invalid-mdnamespace-missing-namespace.ll b/test/Assembler/invalid-dinamespace-missing-namespace.ll
index c29391b..c29391b 100644
--- a/test/Assembler/invalid-mdnamespace-missing-namespace.ll
+++ b/test/Assembler/invalid-dinamespace-missing-namespace.ll
diff --git a/test/Assembler/invalid-mdsubrange-count-large.ll b/test/Assembler/invalid-disubrange-count-large.ll
index 003274f..003274f 100644
--- a/test/Assembler/invalid-mdsubrange-count-large.ll
+++ b/test/Assembler/invalid-disubrange-count-large.ll
diff --git a/test/Assembler/invalid-mdsubrange-count-missing.ll b/test/Assembler/invalid-disubrange-count-missing.ll
index 8fc4487..8fc4487 100644
--- a/test/Assembler/invalid-mdsubrange-count-missing.ll
+++ b/test/Assembler/invalid-disubrange-count-missing.ll
diff --git a/test/Assembler/invalid-mdsubrange-count-negative.ll b/test/Assembler/invalid-disubrange-count-negative.ll
index f2ad4c3..f2ad4c3 100644
--- a/test/Assembler/invalid-mdsubrange-count-negative.ll
+++ b/test/Assembler/invalid-disubrange-count-negative.ll
diff --git a/test/Assembler/invalid-mdsubrange-lowerBound-max.ll b/test/Assembler/invalid-disubrange-lowerBound-max.ll
index e163dc4..e163dc4 100644
--- a/test/Assembler/invalid-mdsubrange-lowerBound-max.ll
+++ b/test/Assembler/invalid-disubrange-lowerBound-max.ll
diff --git a/test/Assembler/invalid-mdsubrange-lowerBound-min.ll b/test/Assembler/invalid-disubrange-lowerBound-min.ll
index 1dc3a14..1dc3a14 100644
--- a/test/Assembler/invalid-mdsubrange-lowerBound-min.ll
+++ b/test/Assembler/invalid-disubrange-lowerBound-min.ll
diff --git a/test/Assembler/invalid-mdsubroutinetype-missing-types.ll b/test/Assembler/invalid-disubroutinetype-missing-types.ll
index 086e5cc..086e5cc 100644
--- a/test/Assembler/invalid-mdsubroutinetype-missing-types.ll
+++ b/test/Assembler/invalid-disubroutinetype-missing-types.ll
diff --git a/test/Assembler/invalid-mdtemplatetypeparameter-missing-type.ll b/test/Assembler/invalid-ditemplatetypeparameter-missing-type.ll
index 797abd2..797abd2 100644
--- a/test/Assembler/invalid-mdtemplatetypeparameter-missing-type.ll
+++ b/test/Assembler/invalid-ditemplatetypeparameter-missing-type.ll
diff --git a/test/Assembler/invalid-mdtemplatevalueparameter-missing-value.ll b/test/Assembler/invalid-ditemplatevalueparameter-missing-value.ll
index 883bea1..883bea1 100644
--- a/test/Assembler/invalid-mdtemplatevalueparameter-missing-value.ll
+++ b/test/Assembler/invalid-ditemplatevalueparameter-missing-value.ll
diff --git a/test/Assembler/metadata.ll b/test/Assembler/metadata.ll
index 4fb8851..052ac1b 100644
--- a/test/Assembler/metadata.ll
+++ b/test/Assembler/metadata.ll
@@ -1,7 +1,7 @@
; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
; RUN: verify-uselistorder %s
-; CHECK: @test
+; CHECK-LABEL: @test
; CHECK: ret void, !bar !1, !foo !0
define void @test() {
add i32 2, 1, !bar !0
@@ -11,17 +11,24 @@ define void @test() {
ret void, !foo !0, !bar !1
-; CHECK: define void @test2() !foo !2 !baz !3
+; CHECK-LABEL: define void @test2() !foo !2 !baz !3
define void @test2() !foo !2 !baz !3 {
-; CHECK: define void @test3() !bar !3
+; CHECK-LABEL: define void @test3() !bar !3
; CHECK: unreachable, !bar !4
define void @test3() !bar !3 {
unreachable, !bar !4
+; CHECK-LABEL: define void @test_attachment_name() {
+; CHECK: unreachable, !\342abc !4
+define void @test_attachment_name() {
+ ;; Escape the first character when printing text IR, since it's a digit
+ unreachable, !\34\32abc !4
!0 = !DILocation(line: 662302, column: 26, scope: !1)
!1 = !DISubprogram(name: "foo")
!2 = distinct !{}
diff --git a/test/Bitcode/Inputs/invalid-alias-type-mismatch.bc b/test/Bitcode/Inputs/invalid-alias-type-mismatch.bc
new file mode 100644
index 0000000..5c42989
--- /dev/null
+++ b/test/Bitcode/Inputs/invalid-alias-type-mismatch.bc
Binary files differ
diff --git a/test/Bitcode/Inputs/invalid-metadata-not-followed-named-node.bc b/test/Bitcode/Inputs/invalid-metadata-not-followed-named-node.bc
new file mode 100644
index 0000000..42a2c3e
--- /dev/null
+++ b/test/Bitcode/Inputs/invalid-metadata-not-followed-named-node.bc
Binary files differ
diff --git a/test/Bitcode/Inputs/invalid-vector-length.bc b/test/Bitcode/Inputs/invalid-vector-length.bc
new file mode 100644
index 0000000..269df83
--- /dev/null
+++ b/test/Bitcode/Inputs/invalid-vector-length.bc
Binary files differ
diff --git a/test/Bitcode/invalid.test b/test/Bitcode/invalid.test
index eb7f979..0aab553 100644
--- a/test/Bitcode/invalid.test
+++ b/test/Bitcode/invalid.test
@@ -187,3 +187,18 @@ RUN: not llvm-dis -disable-output %p/Inputs/invalid-array-operand-encoding.bc 2>
RUN: FileCheck --check-prefix=ARRAY-OP-ENC %s
ARRAY-OP-ENC: Array element type has to be an encoding of a type
+RUN: not llvm-dis -disable-output %p/Inputs/invalid-metadata-not-followed-named-node.bc 2>&1 | \
+RUN: FileCheck --check-prefix=META-NOT-FOLLOWED-BY-NAMED-META %s
+RUN: not llvm-dis -disable-output %p/Inputs/invalid-vector-length.bc 2>&1 | \
+RUN: FileCheck --check-prefix=VECTOR-LENGTH %s
+VECTOR-LENGTH: Invalid vector length
+RUN: not llvm-dis -disable-output %p/Inputs/invalid-alias-type-mismatch.bc 2>&1 | \
+RUN: FileCheck --check-prefix=ALIAS-TYPE-MISMATCH %s
+ALIAS-TYPE-MISMATCH: Alias and aliasee types don't match
diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll
index ff18f73..11228c7 100644
--- a/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -287,3 +287,43 @@
%code1.i.i.phi.trans.insert = getelementptr inbounds %str1, %str1* %0, i64 0, i32 0, i32 0, i64 16
br label
+; CHECK-LABEL: select_and
+define i64 @select_and(i32 %v1, i32 %v2, i64 %a, i64 %b) {
+; CHECK: cmp
+; CHECK: ccmp{{.*}}, #0, ne
+; CHECK: csel{{.*}}, lt
+ %1 = icmp slt i32 %v1, %v2
+ %2 = icmp ne i32 5, %v2
+ %3 = and i1 %1, %2
+ %sel = select i1 %3, i64 %a, i64 %b
+ ret i64 %sel
+; CHECK-LABEL: select_or
+define i64 @select_or(i32 %v1, i32 %v2, i64 %a, i64 %b) {
+; CHECK: cmp
+; CHECK: ccmp{{.*}}, #8, eq
+; CHECK: csel{{.*}}, lt
+ %1 = icmp slt i32 %v1, %v2
+ %2 = icmp ne i32 5, %v2
+ %3 = or i1 %1, %2
+ %sel = select i1 %3, i64 %a, i64 %b
+ ret i64 %sel
+; CHECK-LABEL: select_complicated
+define i16 @select_complicated(double %v1, double %v2, i16 %a, i16 %b) {
+; CHECK: fcmp
+; CHECK: fccmp{{.*}}, #4, ne
+; CHECK: fccmp{{.*}}, #1, ne
+; CHECK: fccmp{{.*}}, #4, vc
+; CEHCK: csel{{.*}}, eq
+ %1 = fcmp one double %v1, %v2
+ %2 = fcmp oeq double %v2, 13.0
+ %3 = fcmp oeq double %v1, 42.0
+ %or0 = or i1 %2, %3
+ %or1 = or i1 %1, %or0
+ %sel = select i1 %or1, i16 %a, i16 %b
+ ret i16 %sel
diff --git a/test/CodeGen/AArch64/arm64-named-reg-alloc.ll b/test/CodeGen/AArch64/arm64-named-reg-alloc.ll
index 0c56454..5d48c17 100644
--- a/test/CodeGen/AArch64/arm64-named-reg-alloc.ll
+++ b/test/CodeGen/AArch64/arm64-named-reg-alloc.ll
@@ -4,7 +4,7 @@
define i32 @get_stack() nounwind {
; FIXME: Include an allocatable-specific error message
-; CHECK: Invalid register name global variable
+; CHECK: Invalid register name "x5".
%sp = call i32 @llvm.read_register.i32(metadata !0)
ret i32 %sp
diff --git a/test/CodeGen/AArch64/arm64-named-reg-notareg.ll b/test/CodeGen/AArch64/arm64-named-reg-notareg.ll
index 759bc15..8a5fd6f 100644
--- a/test/CodeGen/AArch64/arm64-named-reg-notareg.ll
+++ b/test/CodeGen/AArch64/arm64-named-reg-notareg.ll
@@ -3,7 +3,7 @@
define i32 @get_stack() nounwind {
-; CHECK: Invalid register name global variable
+; CHECK: Invalid register name "notareg".
%sp = call i32 @llvm.read_register.i32(metadata !0)
ret i32 %sp
diff --git a/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll b/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
new file mode 100644
index 0000000..e83cbab
--- /dev/null
+++ b/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
@@ -0,0 +1,74 @@
+; RUN: llc -mtriple=aarch64-apple-ios -asm-verbose=false -aarch64-collect-loh=false \
+; RUN: -O1 -global-merge-group-by-use -global-merge-ignore-single-use \
+; RUN: %s -o - | FileCheck %s
+; Check that, at -O1, we only merge globals used in minsize functions.
+; We assume that globals of the same size aren't reordered inside a set.
+; We use -global-merge-ignore-single-use, and thus only expect one merged set.
+@m1 = internal global i32 0, align 4
+@n1 = internal global i32 0, align 4
+define void @f1(i32 %a1, i32 %a2) minsize nounwind {
+; CHECK-NEXT: adrp x8, [[SET:__MergedGlobals]]@PAGE
+; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
+; CHECK-NEXT: stp w0, w1, [x8]
+; CHECK-NEXT: ret
+ store i32 %a1, i32* @m1, align 4
+ store i32 %a2, i32* @n1, align 4
+ ret void
+@m2 = internal global i32 0, align 4
+@n2 = internal global i32 0, align 4
+define void @f2(i32 %a1, i32 %a2) nounwind {
+; CHECK-NEXT: adrp x8, _m2@PAGE
+; CHECK-NEXT: adrp x9, _n2@PAGE
+; CHECK-NEXT: str w0, [x8, _m2@PAGEOFF]
+; CHECK-NEXT: str w1, [x9, _n2@PAGEOFF]
+; CHECK-NEXT: ret
+ store i32 %a1, i32* @m2, align 4
+ store i32 %a2, i32* @n2, align 4
+ ret void
+; If we have use sets partially overlapping between a minsize and a non-minsize
+; function, explicitly check that we only consider the globals used in the
+; minsize function for merging.
+@m3 = internal global i32 0, align 4
+@n3 = internal global i32 0, align 4
+define void @f3(i32 %a1, i32 %a2) minsize nounwind {
+; CHECK-NEXT: adrp x8, [[SET]]@PAGE
+; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
+; CHECK-NEXT: stp w0, w1, [x8, #8]
+; CHECK-NEXT: ret
+ store i32 %a1, i32* @m3, align 4
+ store i32 %a2, i32* @n3, align 4
+ ret void
+@n4 = internal global i32 0, align 4
+define void @f4(i32 %a1, i32 %a2) nounwind {
+; CHECK-NEXT: adrp x8, [[SET]]@PAGE
+; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
+; CHECK-NEXT: adrp x9, _n4@PAGE
+; CHECK-NEXT: str w0, [x8, #8]
+; CHECK-NEXT: str w1, [x9, _n4@PAGEOFF]
+; CHECK-NEXT: ret
+ store i32 %a1, i32* @m3, align 4
+ store i32 %a2, i32* @n4, align 4
+ ret void
+; CHECK-DAG: .zerofill __DATA,__bss,[[SET]],16,3
+; CHECK-DAG: .zerofill __DATA,__bss,_m2,4,2
+; CHECK-DAG: .zerofill __DATA,__bss,_n2,4,2
+; CHECK-DAG: .zerofill __DATA,__bss,_n4,4,2
diff --git a/test/CodeGen/AArch64/minmax.ll b/test/CodeGen/AArch64/minmax.ll
index a6b5ade..df4912c 100644
--- a/test/CodeGen/AArch64/minmax.ll
+++ b/test/CodeGen/AArch64/minmax.ll
@@ -94,3 +94,14 @@ define <16 x i32> @t11(<16 x i32> %a, <16 x i32> %b) {
%t2 = select <16 x i1> %t1, <16 x i32> %a, <16 x i32> %b
ret <16 x i32> %t2
+; CHECK-NOT: umin
+; The icmp is used by two instructions, so don't produce a umin node.
+define <16 x i8> @t12(<16 x i8> %a, <16 x i8> %b) {
+ %t1 = icmp ugt <16 x i8> %b, %a
+ %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b
+ %t3 = zext <16 x i1> %t1 to <16 x i8>
+ %t4 = add <16 x i8> %t3, %t2
+ ret <16 x i8> %t4
diff --git a/test/CodeGen/AArch64/special-reg.ll b/test/CodeGen/AArch64/special-reg.ll
new file mode 100644
index 0000000..91c3215
--- /dev/null
+++ b/test/CodeGen/AArch64/special-reg.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=aarch64-none-eabi -mcpu=cortex-a57 2>&1 | FileCheck %s
+define i64 @read_encoded_register() nounwind {
+; CHECK-LABEL: read_encoded_register:
+; CHECK: mrs x0, S1_2_C3_C4_5
+ %reg = call i64 @llvm.read_register.i64(metadata !0)
+ ret i64 %reg
+define i64 @read_daif() nounwind {
+; CHECK-LABEL: read_daif:
+; CHECK: mrs x0, DAIF
+ %reg = call i64 @llvm.read_register.i64(metadata !1)
+ ret i64 %reg
+define void @write_encoded_register(i64 %x) nounwind {
+; CHECK-LABEL: write_encoded_register:
+; CHECK: msr S1_2_C3_C4_5, x0
+ call void @llvm.write_register.i64(metadata !0, i64 %x)
+ ret void
+define void @write_daif(i64 %x) nounwind {
+; CHECK-LABEL: write_daif:
+; CHECK: msr DAIF, x0
+ call void @llvm.write_register.i64(metadata !1, i64 %x)
+ ret void
+define void @write_daifset() nounwind {
+; CHECK-LABEL: write_daifset:
+; CHECK: msr DAIFSET, #2
+ call void @llvm.write_register.i64(metadata !2, i64 2)
+ ret void
+declare i64 @llvm.read_register.i64(metadata) nounwind
+declare void @llvm.write_register.i64(metadata, i64) nounwind
+!0 = !{!"1:2:3:4:5"}
+!1 = !{!"daif"}
+!2 = !{!"daifset"}
diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll
index db5007b..86287c1 100644
--- a/test/CodeGen/ARM/atomic-ops-v8.ll
+++ b/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -664,7 +664,7 @@ define void @test_atomic_load_min_i64(i64 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
@@ -782,7 +782,7 @@ define void @test_atomic_load_max_i64(i64 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK: ldrexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
@@ -900,7 +900,7 @@ define void @test_atomic_load_umin_i64(i64 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
@@ -1018,7 +1018,7 @@ define void @test_atomic_load_umax_i64(i64 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
@@ -1146,10 +1146,12 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
; function there.
; CHECK-LE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0
; CHECK-LE-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1
-; CHECK-LE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
+; CHECK-ARM-LE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
+; CHECK-THUMB-LE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_HI]], [[MISMATCH_LO]]
; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1
; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0
-; CHECK-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]]
+; CHECK-ARM-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]]
+; CHECK-THUMB-BE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_LO]], [[MISMATCH_HI]]
; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
; As above, r2, r3 is a reasonable guess.
diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll
index 1982fa9..e9de52a 100644
--- a/test/CodeGen/ARM/build-attributes.ll
+++ b/test/CodeGen/ARM/build-attributes.ll
@@ -923,7 +923,7 @@
; CORTEX-M4-SOFT: .eabi_attribute 7, 77
; CORTEX-M4-SOFT: .eabi_attribute 8, 0
; CORTEX-M4-SOFT: .eabi_attribute 9, 2
-; CORTEX-M4-SOFT: .fpu vfpv4-d16
+; CORTEX-M4-SOFT: .fpu fpv4-sp-d16
; CORTEX-M4-SOFT-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-M4-SOFT: .eabi_attribute 20, 1
@@ -953,7 +953,7 @@
; CORTEX-M4-HARD: .eabi_attribute 7, 77
; CORTEX-M4-HARD: .eabi_attribute 8, 0
; CORTEX-M4-HARD: .eabi_attribute 9, 2
-; CORTEX-M4-HARD: .fpu vfpv4-d16
+; CORTEX-M4-HARD: .fpu fpv4-sp-d16
; CORTEX-M4-HARD-NOT: .eabi_attribute 19
;; We default to IEEE 754 compliance
; CORTEX-M4-HARD: .eabi_attribute 20, 1
@@ -984,7 +984,7 @@
; CORTEX-M7: .eabi_attribute 8, 0
; CORTEX-M7: .eabi_attribute 9, 2
-; CORTEX-M7-SINGLE: .fpu fpv5-d16
+; CORTEX-M7-SINGLE: .fpu fpv5-sp-d16
; CORTEX-M7-DOUBLE: .fpu fpv5-d16
; CORTEX-M7: .eabi_attribute 17, 1
; CORTEX-M7-NOT: .eabi_attribute 19
diff --git a/test/CodeGen/ARM/ifcvt-callback.ll b/test/CodeGen/ARM/ifcvt-callback.ll
new file mode 100644
index 0000000..62a66e7
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt-callback.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march thumb %s -o - | FileCheck %s
+; This test checks that if-conversion pass is unconditionally added to the pass
+; pipeline and is conditionally executed based on the per-function targert-cpu
+; attribute.
+; CHECK: ite eq
+define i32 @test_ifcvt(i32 %a, i32 %b) #0 {
+ %tmp2 = icmp eq i32 %a, 0
+ br i1 %tmp2, label %cond_false, label %cond_true
+ %tmp5 = add i32 %b, 1
+ ret i32 %tmp5
+ %tmp7 = add i32 %b, -1
+ ret i32 %tmp7
+attributes #0 = { "target-cpu"="cortex-a8" }
diff --git a/test/CodeGen/ARM/jump-table-islands-split.ll b/test/CodeGen/ARM/jump-table-islands-split.ll
new file mode 100644
index 0000000..deba21b
--- /dev/null
+++ b/test/CodeGen/ARM/jump-table-islands-split.ll
@@ -0,0 +1,52 @@
+; RUN: llc -mtriple=thumbv7s-apple-ios8.0 -o - %s | FileCheck %s
+declare void @foo(double)
+declare i32, i32)
+; The constpool entry used to call @foo should be directly between where we want
+; the tbb and its table. Fortunately, the flow is simple enough that we can
+; eliminate the entry calculation (ADD) and use the ADR as the base.
+; I'm hoping this won't be fragile, but if it does break the most likely fix is
+; adjusting the call slightly. If this happens too many times
+; the test should probably be removed.
+define i32 @test_jumptable_not_adjacent(i1 %tst, i32 %sw, i32 %l) {
+; CHECK-LABEL: test_jumptable_not_adjacent:
+; CHECK: vldr {{d[0-9]+}}, [[DBL_CONST:LCPI[0-9]+_[0-9]+]]
+; [...]
+; CHECK: adr.w r[[BASE:[0-9]+]], [[JUMP_TABLE:LJTI[0-9]+_[0-9]+]]
+; CHECK: [[TBB_KEY:LCPI[0-9]+_[0-9]+]]:
+; CHECK-NEXT: tbb [r[[BASE]], {{r[0-9]+}}]
+; CHECK: .long
+; CHECK: .long
+; CHECK: .byte (LBB{{[0-9]+}}_{{[0-9]+}}-([[TBB_KEY]]+4)
+ br label %complex
+ call void @foo(double 12345.0)
+ call i32 970, i32 undef)
+ switch i32 %sw, label %second [ i32 0, label %other
+ i32 1, label %third
+ i32 2, label %end
+ i32 3, label %other ]
+ ret i32 43
+ ret i32 0
+ call void @bar()
+ unreachable
+ ret i32 42
+declare void @bar()
diff --git a/test/CodeGen/ARM/jump-table-islands.ll b/test/CodeGen/ARM/jump-table-islands.ll
new file mode 100644
index 0000000..6b4f174
--- /dev/null
+++ b/test/CodeGen/ARM/jump-table-islands.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mtriple=armv7-apple-ios8.0 -o - %s | FileCheck %s
+%BigInt = type i5500
+define %BigInt @test_moved_jumptable(i1 %tst, i32 %sw, %BigInt %l) {
+; CHECK-LABEL: test_moved_jumptable:
+; CHECK: adr {{r[0-9]+}}, [[JUMP_TABLE:LJTI[0-9]+_[0-9]+]]
+; CHECK: b [[SKIP_TABLE:LBB[0-9]+_[0-9]+]]
+; CHECK: .data_region jt32
+; CHECK: .long LBB{{[0-9]+_[0-9]+}}-[[JUMP_TABLE]]
+; CHECK: add pc, {{r[0-9]+}}, {{r[0-9]+}}
+ br i1 %tst, label %simple, label %complex
+ br label %end
+ switch i32 %sw, label %simple [ i32 0, label %other
+ i32 1, label %third
+ i32 5, label %end
+ i32 6, label %other ]
+ ret %BigInt 0
+ call void @bar()
+ unreachable
+ %val = phi %BigInt [ %l, %complex ], [ -1, %simple ]
+ ret %BigInt %val
+declare void @bar()
diff --git a/test/CodeGen/ARM/jumptable-label.ll b/test/CodeGen/ARM/jumptable-label.ll
index 49d6986..2ba90dc 100644
--- a/test/CodeGen/ARM/jumptable-label.ll
+++ b/test/CodeGen/ARM/jumptable-label.ll
@@ -2,8 +2,8 @@
; test that we print the label of a bb that is only used in a jump table.
-; CHECK: .long LBB0_2
-; CHECK: LBB0_2:
+; CHECK: .long [[JUMPTABLE_DEST:LBB[0-9]+_[0-9]+]]
define i32 @calculate() {
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index 7ce8468..a8070ea 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -92,6 +92,22 @@ entry:
ret void
+declare void @extfunc(i32, i32, i32, i32)
+; CHECK-LABEL: Func2:
+; A8: ldrd
+; A8: blx
+; A8: pop
+define void @Func2(i32* %p) {
+ %addr0 = getelementptr i32, i32* %p, i32 0
+ %addr1 = getelementptr i32, i32* %p, i32 1
+ %v0 = load i32, i32* %addr0
+ %v1 = load i32, i32* %addr1
+ ; try to force %v0/%v1 into non-adjacent registers
+ call void @extfunc(i32 %v0, i32 0, i32 0, i32 %v1)
+ ret void
declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
diff --git a/test/CodeGen/ARM/named-reg-alloc.ll b/test/CodeGen/ARM/named-reg-alloc.ll
index 380cf39..d41fa64 100644
--- a/test/CodeGen/ARM/named-reg-alloc.ll
+++ b/test/CodeGen/ARM/named-reg-alloc.ll
@@ -4,7 +4,7 @@
define i32 @get_stack() nounwind {
; FIXME: Include an allocatable-specific error message
-; CHECK: Invalid register name global variable
+; CHECK: Invalid register name "r5".
%sp = call i32 @llvm.read_register.i32(metadata !0)
ret i32 %sp
diff --git a/test/CodeGen/ARM/named-reg-notareg.ll b/test/CodeGen/ARM/named-reg-notareg.ll
index 3ac03f4..45cb38f 100644
--- a/test/CodeGen/ARM/named-reg-notareg.ll
+++ b/test/CodeGen/ARM/named-reg-notareg.ll
@@ -3,7 +3,7 @@
define i32 @get_stack() nounwind {
-; CHECK: Invalid register name global variable
+; CHECK: Invalid register name "notareg".
%sp = call i32 @llvm.read_register.i32(metadata !0)
ret i32 %sp
diff --git a/test/CodeGen/ARM/special-reg-acore.ll b/test/CodeGen/ARM/special-reg-acore.ll
new file mode 100644
index 0000000..3d65ff4
--- /dev/null
+++ b/test/CodeGen/ARM/special-reg-acore.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -mtriple=arm-none-eabi -mcpu=cortex-a8 2>&1 | FileCheck %s --check-prefix=ACORE
+; RUN: not llc < %s -mtriple=thumb-none-eabi -mcpu=cortex-m4 2>&1 | FileCheck %s --check-prefix=MCORE
+; MCORE: LLVM ERROR: Invalid register name "cpsr".
+define i32 @read_cpsr() nounwind {
+ ; ACORE-LABEL: read_cpsr:
+ ; ACORE: mrs r0, apsr
+ %reg = call i32 @llvm.read_register.i32(metadata !1)
+ ret i32 %reg
+define i32 @read_aclass_registers() nounwind {
+ ; ACORE-LABEL: read_aclass_registers:
+ ; ACORE: mrs r0, apsr
+ ; ACORE: mrs r1, spsr
+ %0 = call i32 @llvm.read_register.i32(metadata !0)
+ %1 = call i32 @llvm.read_register.i32(metadata !1)
+ %add1 = add i32 %1, %0
+ %2 = call i32 @llvm.read_register.i32(metadata !2)
+ %add2 = add i32 %add1, %2
+ ret i32 %add2
+define void @write_aclass_registers(i32 %x) nounwind {
+ ; ACORE-LABEL: write_aclass_registers:
+ ; ACORE: msr APSR_nzcvq, r0
+ ; ACORE: msr APSR_g, r0
+ ; ACORE: msr APSR_nzcvqg, r0
+ ; ACORE: msr CPSR_c, r0
+ ; ACORE: msr CPSR_x, r0
+ ; ACORE: msr APSR_g, r0
+ ; ACORE: msr APSR_nzcvq, r0
+ ; ACORE: msr CPSR_fsxc, r0
+ ; ACORE: msr SPSR_c, r0
+ ; ACORE: msr SPSR_x, r0
+ ; ACORE: msr SPSR_s, r0
+ ; ACORE: msr SPSR_f, r0
+ ; ACORE: msr SPSR_fsxc, r0
+ call void @llvm.write_register.i32(metadata !3, i32 %x)
+ call void @llvm.write_register.i32(metadata !4, i32 %x)
+ call void @llvm.write_register.i32(metadata !5, i32 %x)
+ call void @llvm.write_register.i32(metadata !6, i32 %x)
+ call void @llvm.write_register.i32(metadata !7, i32 %x)
+ call void @llvm.write_register.i32(metadata !8, i32 %x)
+ call void @llvm.write_register.i32(metadata !9, i32 %x)
+ call void @llvm.write_register.i32(metadata !10, i32 %x)
+ call void @llvm.write_register.i32(metadata !11, i32 %x)
+ call void @llvm.write_register.i32(metadata !12, i32 %x)
+ call void @llvm.write_register.i32(metadata !13, i32 %x)
+ call void @llvm.write_register.i32(metadata !14, i32 %x)
+ call void @llvm.write_register.i32(metadata !15, i32 %x)
+ ret void
+declare i32 @llvm.read_register.i32(metadata) nounwind
+declare void @llvm.write_register.i32(metadata, i32) nounwind
+!0 = !{!"apsr"}
+!1 = !{!"cpsr"}
+!2 = !{!"spsr"}
+!3 = !{!"apsr_nzcvq"}
+!4 = !{!"apsr_g"}
+!5 = !{!"apsr_nzcvqg"}
+!6 = !{!"cpsr_c"}
+!7 = !{!"cpsr_x"}
+!8 = !{!"cpsr_s"}
+!9 = !{!"cpsr_f"}
+!10 = !{!"cpsr_cxsf"}
+!11 = !{!"spsr_c"}
+!12 = !{!"spsr_x"}
+!13 = !{!"spsr_s"}
+!14 = !{!"spsr_f"}
+!15 = !{!"spsr_cxsf"}
diff --git a/test/CodeGen/ARM/special-reg-mcore.ll b/test/CodeGen/ARM/special-reg-mcore.ll
new file mode 100644
index 0000000..686da0f
--- /dev/null
+++ b/test/CodeGen/ARM/special-reg-mcore.ll
@@ -0,0 +1,143 @@
+; RUN: llc < %s -mtriple=thumb-none-eabi -mcpu=cortex-m4 2>&1 | FileCheck %s --check-prefix=MCORE
+; RUN: not llc < %s -mtriple=thumb-none-eabi -mcpu=cortex-m3 2>&1 | FileCheck %s --check-prefix=M3CORE
+; RUN: not llc < %s -mtriple=arm-none-eabi -mcpu=cortex-a8 2>&1 | FileCheck %s --check-prefix=ACORE
+; ACORE: LLVM ERROR: Invalid register name "control".
+; M3CORE: LLVM ERROR: Invalid register name "control".
+define i32 @read_mclass_registers() nounwind {
+ ; MCORE-LABEL: read_mclass_registers:
+ ; MCORE: mrs r0, apsr
+ ; MCORE: mrs r1, iapsr
+ ; MCORE: mrs r1, eapsr
+ ; MCORE: mrs r1, xpsr
+ ; MCORE: mrs r1, ipsr
+ ; MCORE: mrs r1, epsr
+ ; MCORE: mrs r1, iepsr
+ ; MCORE: mrs r1, msp
+ ; MCORE: mrs r1, psp
+ ; MCORE: mrs r1, primask
+ ; MCORE: mrs r1, basepri
+ ; MCORE: mrs r1, basepri_max
+ ; MCORE: mrs r1, faultmask
+ ; MCORE: mrs r1, control
+ %0 = call i32 @llvm.read_register.i32(metadata !0)
+ %1 = call i32 @llvm.read_register.i32(metadata !4)
+ %add1 = add i32 %1, %0
+ %2 = call i32 @llvm.read_register.i32(metadata !8)
+ %add2 = add i32 %add1, %2
+ %3 = call i32 @llvm.read_register.i32(metadata !12)
+ %add3 = add i32 %add2, %3
+ %4 = call i32 @llvm.read_register.i32(metadata !16)
+ %add4 = add i32 %add3, %4
+ %5 = call i32 @llvm.read_register.i32(metadata !17)
+ %add5 = add i32 %add4, %5
+ %6 = call i32 @llvm.read_register.i32(metadata !18)
+ %add6 = add i32 %add5, %6
+ %7 = call i32 @llvm.read_register.i32(metadata !19)
+ %add7 = add i32 %add6, %7
+ %8 = call i32 @llvm.read_register.i32(metadata !20)
+ %add8 = add i32 %add7, %8
+ %9 = call i32 @llvm.read_register.i32(metadata !21)
+ %add9 = add i32 %add8, %9
+ %10 = call i32 @llvm.read_register.i32(metadata !22)
+ %add10 = add i32 %add9, %10
+ %11 = call i32 @llvm.read_register.i32(metadata !23)
+ %add11 = add i32 %add10, %11
+ %12 = call i32 @llvm.read_register.i32(metadata !24)
+ %add12 = add i32 %add11, %12
+ %13 = call i32 @llvm.read_register.i32(metadata !25)
+ %add13 = add i32 %add12, %13
+ ret i32 %add13
+define void @write_mclass_registers(i32 %x) nounwind {
+ ; MCORE-LABEL: write_mclass_registers:
+ ; MCORE: msr apsr_nzcvqg, r0
+ ; MCORE: msr apsr_nzcvq, r0
+ ; MCORE: msr apsr_g, r0
+ ; MCORE: msr apsr_nzcvqg, r0
+ ; MCORE: msr iapsr_nzcvqg, r0
+ ; MCORE: msr iapsr_nzcvq, r0
+ ; MCORE: msr iapsr_g, r0
+ ; MCORE: msr iapsr_nzcvqg, r0
+ ; MCORE: msr eapsr_nzcvqg, r0
+ ; MCORE: msr eapsr_nzcvq, r0
+ ; MCORE: msr eapsr_g, r0
+ ; MCORE: msr eapsr_nzcvqg, r0
+ ; MCORE: msr xpsr_nzcvqg, r0
+ ; MCORE: msr xpsr_nzcvq, r0
+ ; MCORE: msr xpsr_g, r0
+ ; MCORE: msr xpsr_nzcvqg, r0
+ ; MCORE: msr ipsr, r0
+ ; MCORE: msr epsr, r0
+ ; MCORE: msr iepsr, r0
+ ; MCORE: msr msp, r0
+ ; MCORE: msr psp, r0
+ ; MCORE: msr primask, r0
+ ; MCORE: msr basepri, r0
+ ; MCORE: msr basepri_max, r0
+ ; MCORE: msr faultmask, r0
+ ; MCORE: msr control, r0
+ call void @llvm.write_register.i32(metadata !0, i32 %x)
+ call void @llvm.write_register.i32(metadata !1, i32 %x)
+ call void @llvm.write_register.i32(metadata !2, i32 %x)
+ call void @llvm.write_register.i32(metadata !3, i32 %x)
+ call void @llvm.write_register.i32(metadata !4, i32 %x)
+ call void @llvm.write_register.i32(metadata !5, i32 %x)
+ call void @llvm.write_register.i32(metadata !6, i32 %x)
+ call void @llvm.write_register.i32(metadata !7, i32 %x)
+ call void @llvm.write_register.i32(metadata !8, i32 %x)
+ call void @llvm.write_register.i32(metadata !9, i32 %x)
+ call void @llvm.write_register.i32(metadata !10, i32 %x)
+ call void @llvm.write_register.i32(metadata !11, i32 %x)
+ call void @llvm.write_register.i32(metadata !12, i32 %x)
+ call void @llvm.write_register.i32(metadata !13, i32 %x)
+ call void @llvm.write_register.i32(metadata !14, i32 %x)
+ call void @llvm.write_register.i32(metadata !15, i32 %x)
+ call void @llvm.write_register.i32(metadata !16, i32 %x)
+ call void @llvm.write_register.i32(metadata !17, i32 %x)
+ call void @llvm.write_register.i32(metadata !18, i32 %x)
+ call void @llvm.write_register.i32(metadata !19, i32 %x)
+ call void @llvm.write_register.i32(metadata !20, i32 %x)
+ call void @llvm.write_register.i32(metadata !21, i32 %x)
+ call void @llvm.write_register.i32(metadata !22, i32 %x)
+ call void @llvm.write_register.i32(metadata !23, i32 %x)
+ call void @llvm.write_register.i32(metadata !24, i32 %x)
+ call void @llvm.write_register.i32(metadata !25, i32 %x)
+ ret void
+declare i32 @llvm.read_register.i32(metadata) nounwind
+declare void @llvm.write_register.i32(metadata, i32) nounwind
+!0 = !{!"apsr"}
+!1 = !{!"apsr_nzcvq"}
+!2 = !{!"apsr_g"}
+!3 = !{!"apsr_nzcvqg"}
+!4 = !{!"iapsr"}
+!5 = !{!"iapsr_nzcvq"}
+!6 = !{!"iapsr_g"}
+!7 = !{!"iapsr_nzcvqg"}
+!8 = !{!"eapsr"}
+!9 = !{!"eapsr_nzcvq"}
+!10 = !{!"eapsr_g"}
+!11 = !{!"eapsr_nzcvqg"}
+!12 = !{!"xpsr"}
+!13 = !{!"xpsr_nzcvq"}
+!14 = !{!"xpsr_g"}
+!15 = !{!"xpsr_nzcvqg"}
+!16 = !{!"ipsr"}
+!17 = !{!"epsr"}
+!18 = !{!"iepsr"}
+!19 = !{!"msp"}
+!20 = !{!"psp"}
+!21 = !{!"primask"}
+!22 = !{!"basepri"}
+!23 = !{!"basepri_max"}
+!24 = !{!"faultmask"}
+!25 = !{!"control"}
diff --git a/test/CodeGen/ARM/special-reg.ll b/test/CodeGen/ARM/special-reg.ll
new file mode 100644
index 0000000..7ccb490
--- /dev/null
+++ b/test/CodeGen/ARM/special-reg.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -mtriple=arm-none-eabi -mcpu=cortex-a8 2>&1 | FileCheck %s --check-prefix=ARM --check-prefix=ACORE
+; RUN: llc < %s -mtriple=thumb-none-eabi -mcpu=cortex-m4 2>&1 | FileCheck %s --check-prefix=ARM --check-prefix=MCORE
+define i32 @read_i32_encoded_register() nounwind {
+; ARM-LABEL: read_i32_encoded_register:
+; ARM: mrc p1, #2, r0, c3, c4, #5
+ %reg = call i32 @llvm.read_register.i32(metadata !0)
+ ret i32 %reg
+define i64 @read_i64_encoded_register() nounwind {
+; ARM-LABEL: read_i64_encoded_register:
+; ARM: mrrc p1, #2, r0, r1, c3
+ %reg = call i64 @llvm.read_register.i64(metadata !1)
+ ret i64 %reg
+define i32 @read_apsr() nounwind {
+; ARM-LABEL: read_apsr:
+; ARM: mrs r0, apsr
+ %reg = call i32 @llvm.read_register.i32(metadata !2)
+ ret i32 %reg
+define i32 @read_fpscr() nounwind {
+; ARM-LABEL: read_fpscr:
+; ARM: vmrs r0, fpscr
+ %reg = call i32 @llvm.read_register.i32(metadata !3)
+ ret i32 %reg
+define void @write_i32_encoded_register(i32 %x) nounwind {
+; ARM-LABEL: write_i32_encoded_register:
+; ARM: mcr p1, #2, r0, c3, c4, #5
+ call void @llvm.write_register.i32(metadata !0, i32 %x)
+ ret void
+define void @write_i64_encoded_register(i64 %x) nounwind {
+; ARM-LABEL: write_i64_encoded_register:
+; ARM: mcrr p1, #2, r0, r1, c3
+ call void @llvm.write_register.i64(metadata !1, i64 %x)
+ ret void
+define void @write_apsr(i32 %x) nounwind {
+; ARM-LABEL: write_apsr:
+; ACORE: msr APSR_nzcvq, r0
+; MCORE: msr apsr_nzcvq, r0
+ call void @llvm.write_register.i32(metadata !4, i32 %x)
+ ret void
+define void @write_fpscr(i32 %x) nounwind {
+; ARM-LABEL: write_fpscr:
+; ARM: vmsr fpscr, r0
+ call void @llvm.write_register.i32(metadata !3, i32 %x)
+ ret void
+declare i32 @llvm.read_register.i32(metadata) nounwind
+declare i64 @llvm.read_register.i64(metadata) nounwind
+declare void @llvm.write_register.i32(metadata, i32) nounwind
+declare void @llvm.write_register.i64(metadata, i64) nounwind
+!0 = !{!"cp1:2:c3:c4:5"}
+!1 = !{!"cp1:2:c3"}
+!2 = !{!"apsr"}
+!3 = !{!"fpscr"}
+!4 = !{!"apsr_nzcvq"}
diff --git a/test/CodeGen/BPF/alu8.ll b/test/CodeGen/BPF/alu8.ll
index 0233225..c1c2bd2 100644
--- a/test/CodeGen/BPF/alu8.ll
+++ b/test/CodeGen/BPF/alu8.ll
@@ -1,5 +1,4 @@
-; RUN: llc -march=bpf -show-mc-encoding < %s | FileCheck %s
-; test little endian only for now
+; RUN: llc -march=bpfel -show-mc-encoding < %s | FileCheck %s
define i8 @mov(i8 %a, i8 %b) nounwind {
diff --git a/test/CodeGen/BPF/atomics.ll b/test/CodeGen/BPF/atomics.ll
index 2f9730d..a2e17d2 100644
--- a/test/CodeGen/BPF/atomics.ll
+++ b/test/CodeGen/BPF/atomics.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=bpf -verify-machineinstrs -show-mc-encoding | FileCheck %s
-; test little endian only for now
+; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding | FileCheck %s
; CHECK-LABEL: test_load_add_32
; CHECK: xadd32
diff --git a/test/CodeGen/BPF/basictest.ll b/test/CodeGen/BPF/basictest.ll
index 2a2d498..82feb43 100644
--- a/test/CodeGen/BPF/basictest.ll
+++ b/test/CodeGen/BPF/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=bpf | FileCheck %s
+; RUN: llc < %s -march=bpfel | FileCheck %s
define i32 @test0(i32 %X) {
%tmp.1 = add i32 %X, 1
diff --git a/test/CodeGen/BPF/cc_args.ll b/test/CodeGen/BPF/cc_args.ll
index 5085fe5..8e3f860 100644
--- a/test/CodeGen/BPF/cc_args.ll
+++ b/test/CodeGen/BPF/cc_args.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=bpf -show-mc-encoding | FileCheck %s
-; test little endian only for now
+; RUN: llc < %s -march=bpfel -show-mc-encoding | FileCheck %s
define void @test() #0 {
diff --git a/test/CodeGen/BPF/cc_args_be.ll b/test/CodeGen/BPF/cc_args_be.ll
new file mode 100644
index 0000000..59a7439
--- /dev/null
+++ b/test/CodeGen/BPF/cc_args_be.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -march=bpfeb -show-mc-encoding | FileCheck %s
+; test big endian
+define void @test() #0 {
+; CHECK: test:
+; CHECK: mov r1, 123 # encoding: [0xb7,0x10,0x00,0x00,0x00,0x00,0x00,0x7b]
+; CHECK: call f_i16
+ call void @f_i16(i16 123)
+; CHECK: mov r1, 12345678 # encoding: [0xb7,0x10,0x00,0x00,0x00,0xbc,0x61,0x4e]
+; CHECK: call f_i32
+ call void @f_i32(i32 12345678)
+; CHECK: ld_64 r1, 72623859790382856 # encoding: [0x18,0x10,0x00,0x00,0x05,0x06,0x07,0x08,0x00,0x00,0x00,0x00,0x01,0x02,0x03,0x04]
+; CHECK: call f_i64
+ call void @f_i64(i64 72623859790382856)
+; CHECK: mov r1, 1234
+; CHECK: mov r2, 5678
+; CHECK: call f_i32_i32
+ call void @f_i32_i32(i32 1234, i32 5678)
+; CHECK: mov r1, 2
+; CHECK: mov r2, 3
+; CHECK: mov r3, 4
+; CHECK: call f_i16_i32_i16
+ call void @f_i16_i32_i16(i16 2, i32 3, i16 4)
+; CHECK: mov r1, 5
+; CHECK: ld_64 r2, 7262385979038285
+; CHECK: mov r3, 6
+; CHECK: call f_i16_i64_i16
+ call void @f_i16_i64_i16(i16 5, i64 7262385979038285, i16 6)
+ ret void
+@g_i16 = common global i16 0, align 2
+@g_i32 = common global i32 0, align 2
+@g_i64 = common global i64 0, align 4
+define void @f_i16(i16 %a) #0 {
+; CHECK: f_i16:
+; CHECK: sth 0(r2), r1 # encoding: [0x6b,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+ store volatile i16 %a, i16* @g_i16, align 2
+ ret void
+define void @f_i32(i32 %a) #0 {
+; CHECK: f_i32:
+; CHECK: sth 2(r2), r1 # encoding: [0x6b,0x21,0x00,0x02,0x00,0x00,0x00,0x00]
+; CHECK: sth 0(r2), r1 # encoding: [0x6b,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+ store volatile i32 %a, i32* @g_i32, align 2
+ ret void
+define void @f_i64(i64 %a) #0 {
+; CHECK: f_i64:
+; CHECK: stw 4(r2), r1 # encoding: [0x63,0x21,0x00,0x04,0x00,0x00,0x00,0x00]
+; CHECK: stw 0(r2), r1
+ store volatile i64 %a, i64* @g_i64, align 2
+ ret void
+define void @f_i32_i32(i32 %a, i32 %b) #0 {
+; CHECK: f_i32_i32:
+; CHECK: stw 0(r3), r1
+ store volatile i32 %a, i32* @g_i32, align 4
+; CHECK: stw 0(r3), r2
+ store volatile i32 %b, i32* @g_i32, align 4
+ ret void
+define void @f_i16_i32_i16(i16 %a, i32 %b, i16 %c) #0 {
+; CHECK: f_i16_i32_i16:
+; CHECK: sth 0(r4), r1
+ store volatile i16 %a, i16* @g_i16, align 2
+; CHECK: stw 0(r1), r2
+ store volatile i32 %b, i32* @g_i32, align 4
+; CHECK: sth 0(r4), r3
+ store volatile i16 %c, i16* @g_i16, align 2
+ ret void
+define void @f_i16_i64_i16(i16 %a, i64 %b, i16 %c) #0 {
+; CHECK: f_i16_i64_i16:
+; CHECK: sth 0(r4), r1
+ store volatile i16 %a, i16* @g_i16, align 2
+; CHECK: std 0(r1), r2 # encoding: [0x7b,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+ store volatile i64 %b, i64* @g_i64, align 8
+; CHECK: sth 0(r4), r3
+ store volatile i16 %c, i16* @g_i16, align 2
+ ret void
diff --git a/test/CodeGen/BPF/cc_ret.ll b/test/CodeGen/BPF/cc_ret.ll
index e32b17b..0957492 100644
--- a/test/CodeGen/BPF/cc_ret.ll
+++ b/test/CodeGen/BPF/cc_ret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=bpf | FileCheck %s
+; RUN: llc < %s -march=bpfel | FileCheck %s
define void @test() #0 {
diff --git a/test/CodeGen/BPF/ex1.ll b/test/CodeGen/BPF/ex1.ll
index be038e9..546e5d4 100644
--- a/test/CodeGen/BPF/ex1.ll
+++ b/test/CodeGen/BPF/ex1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=bpf | FileCheck %s
+; RUN: llc < %s -march=bpfel | FileCheck %s
%struct.bpf_context = type { i64, i64, i64, i64, i64, i64, i64 }
%struct.sk_buff = type { i64, i64, i64, i64, i64, i64, i64 }
diff --git a/test/CodeGen/BPF/intrinsics.ll b/test/CodeGen/BPF/intrinsics.ll
index 98b57de..483473e 100644
--- a/test/CodeGen/BPF/intrinsics.ll
+++ b/test/CodeGen/BPF/intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=bpf -show-mc-encoding | FileCheck %s
+; RUN: llc < %s -march=bpfel -show-mc-encoding | FileCheck %s
; Function Attrs: nounwind uwtable
define i32 @ld_b(i64 %foo, i64* nocapture %bar, i8* %ctx, i8* %ctx2) #0 {
diff --git a/test/CodeGen/BPF/load.ll b/test/CodeGen/BPF/load.ll
index 03fb17c..d4ba315 100644
--- a/test/CodeGen/BPF/load.ll
+++ b/test/CodeGen/BPF/load.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=bpf | FileCheck %s
+; RUN: llc < %s -march=bpfel | FileCheck %s
define i16 @am1(i16* %a) nounwind {
%1 = load i16, i16* %a
diff --git a/test/CodeGen/BPF/loops.ll b/test/CodeGen/BPF/loops.ll
index 4798d78..00be54b 100644
--- a/test/CodeGen/BPF/loops.ll
+++ b/test/CodeGen/BPF/loops.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=bpf | FileCheck %s
+; RUN: llc < %s -march=bpfel | FileCheck %s
define zeroext i16 @add(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
diff --git a/test/CodeGen/BPF/sanity.ll b/test/CodeGen/BPF/sanity.ll
index 09a6b65..7f0ef88 100644
--- a/test/CodeGen/BPF/sanity.ll
+++ b/test/CodeGen/BPF/sanity.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=bpf | FileCheck %s
+; RUN: llc < %s -march=bpfel | FileCheck %s
@foo_printf.fmt = private unnamed_addr constant [9 x i8] c"hello \0A\00", align 1
diff --git a/test/CodeGen/BPF/setcc.ll b/test/CodeGen/BPF/setcc.ll
index eabb6c9..f6c6db6 100644
--- a/test/CodeGen/BPF/setcc.ll
+++ b/test/CodeGen/BPF/setcc.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=bpf < %s | FileCheck %s
+; RUN: llc -march=bpfel < %s | FileCheck %s
define i16 @sccweqand(i16 %a, i16 %b) nounwind {
%t1 = and i16 %a, %b
diff --git a/test/CodeGen/BPF/shifts.ll b/test/CodeGen/BPF/shifts.ll
index 898ae2d..cb000b9 100644
--- a/test/CodeGen/BPF/shifts.ll
+++ b/test/CodeGen/BPF/shifts.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=bpf -show-mc-encoding | FileCheck %s
-; test little endian only for now
+; RUN: llc < %s -march=bpfel -show-mc-encoding | FileCheck %s
define zeroext i8 @lshr8(i8 zeroext %a, i8 zeroext %cnt) nounwind readnone {
diff --git a/test/CodeGen/BPF/sockex2.ll b/test/CodeGen/BPF/sockex2.ll
index 6ae5e1c..d372a59 100644
--- a/test/CodeGen/BPF/sockex2.ll
+++ b/test/CodeGen/BPF/sockex2.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=bpf -show-mc-encoding | FileCheck %s
-; test little endian only for now
+; RUN: llc < %s -march=bpfel -show-mc-encoding | FileCheck %s
%struct.bpf_map_def = type { i32, i32, i32, i32 }
%struct.sk_buff = type opaque
diff --git a/test/CodeGen/Generic/stop-after.ll b/test/CodeGen/Generic/stop-after.ll
index 557e097..791378c 100644
--- a/test/CodeGen/Generic/stop-after.ll
+++ b/test/CodeGen/Generic/stop-after.ll
@@ -1,9 +1,10 @@
; RUN: llc < %s -debug-pass=Structure -stop-after=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP
; RUN: llc < %s -debug-pass=Structure -start-after=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=START
-; STOP: -loop-reduce -print-module
+; STOP: -loop-reduce
; STOP: Loop Strength Reduction
; STOP-NEXT: Machine Function Analysis
+; STOP-NEXT: MIR Printing Pass
; START: -machine-branch-prob -gc-lowering
; START: FunctionPass Manager
diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll
index aea4ffe..1c470f6 100644
--- a/test/CodeGen/Hexagon/args.ll
+++ b/test/CodeGen/Hexagon/args.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched -disable-hexagon-misched < %s | FileCheck %s
-; CHECK: memw(r29{{ *}}+{{ *}}#0){{ *}}={{ *}}#7
-; CHECK: r1:0 = combine(#2, #1)
-; CHECK: r3:2 = combine(#4, #3)
+; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: r5:4 = combine(#6, #5)
+; CHECK: r3:2 = combine(#4, #3)
+; CHECK: r1:0 = combine(#2, #1)
+; CHECK: memw(r29{{ *}}+{{ *}}#0){{ *}}={{ *}}#7
define void @foo() nounwind {
diff --git a/test/CodeGen/Hexagon/calling-conv.ll b/test/CodeGen/Hexagon/calling-conv.ll
deleted file mode 100644
index 7133c1a..0000000
--- a/test/CodeGen/Hexagon/calling-conv.ll
+++ /dev/null
@@ -1,73 +0,0 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 <%s | \
-; RUN: FileCheck %s --check-prefix=CHECK-ONE
-; RUN: llc -march=hexagon -mcpu=hexagonv5 <%s | \
-; RUN: FileCheck %s --check-prefix=CHECK-TWO
-; RUN: llc -march=hexagon -mcpu=hexagonv5 <%s | \
-; RUN: FileCheck %s --check-prefix=CHECK-THREE
-%struct.test_struct = type { i32, i8, i64 }
-%struct.test_struct_long = type { i8, i64 }
-@mystruct = external global %struct.test_struct*, align 4
-; CHECK-ONE: memw(r29+#48) = r2
-; CHECK-TWO: memw(r29+#52) = r2
-; CHECK-THREE: memw(r29+#56) = r2
-; Function Attrs: nounwind
-define void @foo(%struct.test_struct* noalias sret %agg.result, i32 %a, i8 zeroext %c, %struct.test_struct* byval %s, %struct.test_struct_long* byval %t) #0 {
- %a.addr = alloca i32, align 4
- %c.addr = alloca i8, align 1
- %z = alloca i32, align 4
- %ret = alloca %struct.test_struct, align 8
- store i32 %a, i32* %a.addr, align 4
- store i8 %c, i8* %c.addr, align 1
- %0 = bitcast i32* %z to i8*
- call void @llvm.lifetime.start(i64 4, i8* %0) #1
- store i32 45, i32* %z, align 4
- %1 = bitcast %struct.test_struct* %ret to i8*
- call void @llvm.lifetime.start(i64 16, i8* %1) #1
- %2 = load i32, i32* %z, align 4
- %3 = load %struct.test_struct*, %struct.test_struct** @mystruct, align 4
- %4 = load %struct.test_struct*, %struct.test_struct** @mystruct, align 4
- %5 = load i8, i8* %c.addr, align 1
- %6 = load i32, i32* %a.addr, align 4
- %conv = sext i32 %6 to i64
- %add = add nsw i64 %conv, 1
- %7 = load i32, i32* %a.addr, align 4
- %add1 = add nsw i32 %7, 2
- %8 = load i32, i32* %a.addr, align 4
- %conv2 = sext i32 %8 to i64
- %add3 = add nsw i64 %conv2, 3
- %9 = load i8, i8* %c.addr, align 1
- %10 = load i8, i8* %c.addr, align 1
- %11 = load i8, i8* %c.addr, align 1
- %12 = load i32, i32* %z, align 4
- call void @bar(%struct.test_struct* sret %ret, i32 %2, %struct.test_struct* byval %3, %struct.test_struct* byval %4, i8 zeroext %5, i64 %add, i32 %add1, i64 %add3, i8 zeroext %9, i8 zeroext %10, i8 zeroext %11, i32 %12)
- %x = getelementptr inbounds %struct.test_struct, %struct.test_struct* %ret, i32 0, i32 0
- store i32 20, i32* %x, align 4
- %13 = bitcast %struct.test_struct* %agg.result to i8*
- %14 = bitcast %struct.test_struct* %ret to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %13, i8* %14, i32 16, i32 8, i1 false)
- %15 = bitcast %struct.test_struct* %ret to i8*
- call void @llvm.lifetime.end(i64 16, i8* %15) #1
- %16 = bitcast i32* %z to i8*
- call void @llvm.lifetime.end(i64 4, i8* %16) #1
- ret void
-; Function Attrs: nounwind
-declare void @llvm.lifetime.start(i64, i8* nocapture) #1
-declare void @bar(%struct.test_struct* sret, i32, %struct.test_struct* byval, %struct.test_struct* byval, i8 zeroext, i64, i32, i64, i8 zeroext, i8 zeroext, i8 zeroext, i32) #2
-; Function Attrs: nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
-; Function Attrs: nounwind
-declare void @llvm.lifetime.end(i64, i8* nocapture) #1
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv4" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv4" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Hexagon/cext-valid-packet1.ll b/test/CodeGen/Hexagon/cext-valid-packet1.ll
index a479d37..35e7b36 100644
--- a/test/CodeGen/Hexagon/cext-valid-packet1.ll
+++ b/test/CodeGen/Hexagon/cext-valid-packet1.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; Check that the packetizer generates valid packets with constant
; extended instructions.
diff --git a/test/CodeGen/Hexagon/cext-valid-packet2.ll b/test/CodeGen/Hexagon/cext-valid-packet2.ll
index 2eba743..c3a4915 100644
--- a/test/CodeGen/Hexagon/cext-valid-packet2.ll
+++ b/test/CodeGen/Hexagon/cext-valid-packet2.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; Check that the packetizer generates valid packets with constant
; extended add and base+offset store instructions.
diff --git a/test/CodeGen/Hexagon/compound.ll b/test/CodeGen/Hexagon/compound.ll
new file mode 100644
index 0000000..f8d36b8
--- /dev/null
+++ b/test/CodeGen/Hexagon/compound.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+; CHECK: p0 =,#-1); if (! jump:nt
+declare void @a()
+declare void @b()
+define void @foo(i32 %a) {
+%b = icmp sgt i32 %a, -1
+br i1 %b, label %x, label %y
+call void @a()
+ret void
+call void @b()
+ret void
+} \ No newline at end of file
diff --git a/test/CodeGen/Hexagon/dualstore.ll b/test/CodeGen/Hexagon/dualstore.ll
index 33d9ce9..9f4569d 100644
--- a/test/CodeGen/Hexagon/dualstore.ll
+++ b/test/CodeGen/Hexagon/dualstore.ll
@@ -1,12 +1,11 @@
-; RUN: llc -march=hexagon -disable-hexagon-misched < %s | FileCheck %s
+; RUN: llc -march=hexagon -filetype=obj %s -o - | llvm-objdump -d - | FileCheck %s
; Check that we generate dual stores in one packet in V4
-; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}=
-; CHECK-NEXT: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}=
+; CHECK: 00 40 9f 52 529f4000
+; CHECK: 10 10 00 f0 f0001010
-define i32 @main(i32 %v, i32* %p1, i32* %p2) nounwind {
- store i32 %v, i32* %p1, align 4
- store i32 %v, i32* %p2, align 4
- ret i32 0
+define void @foo(i32* %a, i32* %b) {
+ store i32 0, i32* %a
+ store i32 0, i32* %b
+ ret void
diff --git a/test/CodeGen/Hexagon/duplex.ll b/test/CodeGen/Hexagon/duplex.ll
new file mode 100644
index 0000000..80fe61c
--- /dev/null
+++ b/test/CodeGen/Hexagon/duplex.ll
@@ -0,0 +1,7 @@
+; RUN: llc -march=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+; CHECK: c0 3f 00 48 48003fc0
+define i32 @foo() {
+ret i32 0
+} \ No newline at end of file
diff --git a/test/CodeGen/Hexagon/relax.ll b/test/CodeGen/Hexagon/relax.ll
new file mode 100644
index 0000000..9823d4d
--- /dev/null
+++ b/test/CodeGen/Hexagon/relax.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=hexagon -filetype=obj < %s | llvm-objdump -d -r - | FileCheck %s
+declare void @bar()
+define void @foo() {
+call void @bar()
+ret void
+; CHECK: { allocframe(#0) }
+; CHECK: { call 0 }
+; CHECK: 00000004: R_HEX_B22_PCREL
+; CHECK: { dealloc_return } \ No newline at end of file
diff --git a/test/CodeGen/Hexagon/sube.ll b/test/CodeGen/Hexagon/sube.ll
index 873f52b..9735894 100644
--- a/test/CodeGen/Hexagon/sube.ll
+++ b/test/CodeGen/Hexagon/sube.ll
@@ -3,10 +3,10 @@
; CHECK: r{{[0-9]+:[0-9]+}} = #1
; CHECK: r{{[0-9]+:[0-9]+}} = #0
; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
-; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
-; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}}, r{{[0-9]+}})
define void @check_sube_subc(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
diff --git a/test/CodeGen/MIR/lit.local.cfg b/test/CodeGen/MIR/lit.local.cfg
new file mode 100644
index 0000000..e69aa57
--- /dev/null
+++ b/test/CodeGen/MIR/lit.local.cfg
@@ -0,0 +1,2 @@
+config.suffixes = ['.mir']
diff --git a/test/CodeGen/MIR/llvm-ir-error-reported.mir b/test/CodeGen/MIR/llvm-ir-error-reported.mir
new file mode 100644
index 0000000..013b28c
--- /dev/null
+++ b/test/CodeGen/MIR/llvm-ir-error-reported.mir
@@ -0,0 +1,22 @@
+# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures an error is reported if the embedded LLVM IR contains an
+# error.
+--- |
+ ; CHECK: [[@LINE+3]]:15: error: use of undefined value '%a'
+ define i32 @foo(i32 %x, i32 %y) {
+ %z = alloca i32, align 4
+ store i32 %a, i32* %z, align 4
+ br label %Test
+ Test:
+ %m = load i32, i32* %z, align 4
+ %cond = icmp eq i32 %y, %m
+ br i1 %cond, label %IfEqual, label %IfUnequal
+ IfEqual:
+ ret i32 1
+ IfUnequal:
+ ret i32 0
+ }
diff --git a/test/CodeGen/MIR/llvmIR.mir b/test/CodeGen/MIR/llvmIR.mir
new file mode 100644
index 0000000..7a7b46b
--- /dev/null
+++ b/test/CodeGen/MIR/llvmIR.mir
@@ -0,0 +1,32 @@
+# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the LLVM IR that's embedded with MIR is parsed
+# correctly.
+--- |
+ ; CHECK: define i32 @foo(i32 %x, i32 %y)
+ ; CHECK: %z = alloca i32, align 4
+ ; CHECK: store i32 %x, i32* %z, align 4
+ ; CHECK: br label %Test
+ ; CHECK: Test:
+ ; CHECK: %m = load i32, i32* %z, align 4
+ ; CHECK: %cond = icmp eq i32 %y, %m
+ ; CHECK: br i1 %cond, label %IfEqual, label %IfUnequal
+ ; CHECK: IfEqual:
+ ; CHECK: ret i32 1
+ ; CHECK: IfUnequal:
+ ; CHECK: ret i32 0
+ define i32 @foo(i32 %x, i32 %y) {
+ %z = alloca i32, align 4
+ store i32 %x, i32* %z, align 4
+ br label %Test
+ Test:
+ %m = load i32, i32* %z, align 4
+ %cond = icmp eq i32 %y, %m
+ br i1 %cond, label %IfEqual, label %IfUnequal
+ IfEqual:
+ ret i32 1
+ IfUnequal:
+ ret i32 0
+ }
diff --git a/test/CodeGen/MIR/llvmIRMissing.mir b/test/CodeGen/MIR/llvmIRMissing.mir
new file mode 100644
index 0000000..2acbcd1
--- /dev/null
+++ b/test/CodeGen/MIR/llvmIRMissing.mir
@@ -0,0 +1,5 @@
+# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s
+# This test ensures that the MIR parser accepts files without the LLVM IR.
diff --git a/test/CodeGen/MIR/machine-function-missing-name.mir b/test/CodeGen/MIR/machine-function-missing-name.mir
new file mode 100644
index 0000000..54668f1
--- /dev/null
+++ b/test/CodeGen/MIR/machine-function-missing-name.mir
@@ -0,0 +1,22 @@
+# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that an error is reported when a machine function doesn't
+# have a name attribute.
+--- |
+ define i32 @foo() {
+ ret i32 0
+ }
+ define i32 @bar() {
+ ret i32 0
+ }
+# CHECK: [[@LINE+1]]:1: error: missing required key 'name'
+nme: foo
+name: bar
diff --git a/test/CodeGen/MIR/machine-function.mir b/test/CodeGen/MIR/machine-function.mir
new file mode 100644
index 0000000..679bfd2
--- /dev/null
+++ b/test/CodeGen/MIR/machine-function.mir
@@ -0,0 +1,24 @@
+# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses machine functions correctly.
+--- |
+ define i32 @foo() {
+ ret i32 0
+ }
+ define i32 @bar() {
+ ret i32 0
+ }
+# CHECK: name: foo
+# CHECK-NEXT: ...
+name: foo
+# CHECK: name: bar
+# CHECK-NEXT: ...
+name: bar
diff --git a/test/CodeGen/Mips/Fast-ISel/bswap1.ll b/test/CodeGen/Mips/Fast-ISel/bswap1.ll
new file mode 100644
index 0000000..8ac9753
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/bswap1.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=mipsel -mcpu=mips32 -O0 -relocation-model=pic \
+; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=32R1
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -O0 -relocation-model=pic \
+; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=32R2
+@a = global i16 -21829, align 2
+@b = global i32 -1430532899, align 4
+@a1 = common global i16 0, align 2
+@b1 = common global i32 0, align 4
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+define void @b16() {
+ ; ALL-LABEL: b16:
+ ; ALL: lw $[[A_ADDR:[0-9]+]], %got(a)($[[GOT_ADDR:[0-9]+]])
+ ; ALL: lhu $[[A_VAL:[0-9]+]], 0($[[A_ADDR]])
+ ; 32R1: sll $[[TMP1:[0-9]+]], $[[A_VAL]], 8
+ ; 32R1: srl $[[TMP2:[0-9]+]], $[[A_VAL]], 8
+ ; 32R1: or $[[TMP3:[0-9]+]], $[[TMP1]], $[[TMP2]]
+ ; 32R1: andi $[[TMP4:[0-9]+]], $[[TMP3]], 65535
+ ; 32R2: wsbh $[[RESULT:[0-9]+]], $[[A_VAL]]
+ %1 = load i16, i16* @a, align 2
+ %2 = call i16 @llvm.bswap.i16(i16 %1)
+ store i16 %2, i16* @a1, align 2
+ ret void
+define void @b32() {
+ ; ALL-LABEL: b32:
+ ; ALL: lw $[[B_ADDR:[0-9]+]], %got(b)($[[GOT_ADDR:[0-9]+]])
+ ; ALL: lw $[[B_VAL:[0-9]+]], 0($[[B_ADDR]])
+ ; 32R1: srl $[[TMP1:[0-9]+]], $[[B_VAL]], 8
+ ; 32R1: srl $[[TMP2:[0-9]+]], $[[B_VAL]], 24
+ ; 32R1: andi $[[TMP3:[0-9]+]], $[[TMP1]], 65280
+ ; 32R1: or $[[TMP4:[0-9]+]], $[[TMP2]], $[[TMP3]]
+ ; 32R1: andi $[[TMP5:[0-9]+]], $[[B_VAL]], 65280
+ ; 32R1: sll $[[TMP6:[0-9]+]], $[[TMP5]], 8
+ ; 32R1: sll $[[TMP7:[0-9]+]], $[[B_VAL]], 24
+ ; 32R1: or $[[TMP8:[0-9]+]], $[[TMP4]], $[[TMP6]]
+ ; 32R1: or $[[RESULT:[0-9]+]], $[[TMP7]], $[[TMP8]]
+ ; 32R2: wsbh $[[TMP:[0-9]+]], $[[B_VAL]]
+ ; 32R2: rotr $[[RESULT:[0-9]+]], $[[TMP]], 16
+ %1 = load i32, i32* @b, align 4
+ %2 = call i32 @llvm.bswap.i32(i32 %1)
+ store i32 %2, i32* @b1, align 4
+ ret void
diff --git a/test/CodeGen/Mips/Fast-ISel/div1.ll b/test/CodeGen/Mips/Fast-ISel/div1.ll
new file mode 100644
index 0000000..89e7f21
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/div1.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=mipsel -mcpu=mips32 -O0 -relocation-model=pic \
+; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -O0 -relocation-model=pic \
+; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s
+@sj = global i32 200000, align 4
+@sk = global i32 -47, align 4
+@uj = global i32 200000, align 4
+@uk = global i32 43, align 4
+@si = common global i32 0, align 4
+@ui = common global i32 0, align 4
+define void @divs() {
+ ; CHECK-LABEL: divs:
+ ; CHECK: lui $[[GOT1:[0-9]+]], %hi(_gp_disp)
+ ; CHECK: addiu $[[GOT2:[0-9]+]], $[[GOT1]], %lo(_gp_disp)
+ ; CHECK: addu $[[GOT:[0-9]+]], $[[GOT2:[0-9]+]], $25
+ ; CHECK-DAG: lw $[[I_ADDR:[0-9]+]], %got(si)($[[GOT]])
+ ; CHECK-DAG: lw $[[K_ADDR:[0-9]+]], %got(sk)($[[GOT]])
+ ; CHECK-DAG: lw $[[J_ADDR:[0-9]+]], %got(sj)($[[GOT]])
+ ; CHECK-DAG: lw $[[J:[0-9]+]], 0($[[J_ADDR]])
+ ; CHECK-DAG: lw $[[K:[0-9]+]], 0($[[K_ADDR]])
+ ; CHECK-DAG: div $zero, $[[J]], $[[K]]
+ ; CHECK_DAG: teq $[[K]], $zero, 7
+ ; CHECK-DAG: mflo $[[RESULT:[0-9]+]]
+ ; CHECK: sw $[[RESULT]], 0($[[I_ADDR]])
+ %1 = load i32, i32* @sj, align 4
+ %2 = load i32, i32* @sk, align 4
+ %div = sdiv i32 %1, %2
+ store i32 %div, i32* @si, align 4
+ ret void
+define void @divu() {
+ ; CHECK-LABEL: divu:
+ ; CHECK: lui $[[GOT1:[0-9]+]], %hi(_gp_disp)
+ ; CHECK: addiu $[[GOT2:[0-9]+]], $[[GOT1]], %lo(_gp_disp)
+ ; CHECK: addu $[[GOT:[0-9]+]], $[[GOT2:[0-9]+]], $25
+ ; CHECK-DAG: lw $[[I_ADDR:[0-9]+]], %got(ui)($[[GOT]])
+ ; CHECK-DAG: lw $[[K_ADDR:[0-9]+]], %got(uk)($[[GOT]])
+ ; CHECK-DAG: lw $[[J_ADDR:[0-9]+]], %got(uj)($[[GOT]])
+ ; CHECK-DAG: lw $[[J:[0-9]+]], 0($[[J_ADDR]])
+ ; CHECK-DAG: lw $[[K:[0-9]+]], 0($[[K_ADDR]])
+ ; CHECK-DAG: divu $zero, $[[J]], $[[K]]
+ ; CHECK_DAG: teq $[[K]], $zero, 7
+ ; CHECK-DAG: mflo $[[RESULT:[0-9]+]]
+ ; CHECK: sw $[[RESULT]], 0($[[I_ADDR]])
+ %1 = load i32, i32* @uj, align 4
+ %2 = load i32, i32* @uk, align 4
+ %div = udiv i32 %1, %2
+ store i32 %div, i32* @ui, align 4
+ ret void
diff --git a/test/CodeGen/Mips/Fast-ISel/memtest1.ll b/test/CodeGen/Mips/Fast-ISel/memtest1.ll
new file mode 100644
index 0000000..a3fc4a3
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/memtest1.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s -march=mipsel -mcpu=mips32 -O0 -relocation-model=pic \
+; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=32R1
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -O0 -relocation-model=pic \
+; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=32R2
+@str = private unnamed_addr constant [12 x i8] c"hello there\00", align 1
+@src = global i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str, i32 0, i32 0), align 4
+@i = global i32 12, align 4
+@dest = common global [50 x i8] zeroinitializer, align 1
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
+define void @cpy(i8* %src, i32 %i) {
+ ; ALL-LABEL: cpy:
+ ; ALL-DAG: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}})
+ ; ALL-DAG: sw $4, 24($sp)
+ ; ALL-DAG: move $4, $[[T0]]
+ ; ALL-DAG: sw $5, 20($sp)
+ ; ALL-DAG: lw $[[T1:[0-9]+]], 24($sp)
+ ; ALL-DAG: move $5, $[[T1]]
+ ; ALL-DAG: lw $6, 20($sp)
+ ; ALL-DAG: lw $[[T2:[0-9]+]], %got(memcpy)(${{[0-9]+}})
+ ; ALL: jalr $[[T2]]
+ ; ALL-NEXT: nop
+ ; ALL-NOT: {{.*}}$2{{.*}}
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @dest, i32 0, i32 0),
+ i8* %src, i32 %i, i32 1, i1 false)
+ ret void
+define void @mov(i8* %src, i32 %i) {
+ ; ALL-LABEL: mov:
+ ; ALL-DAG: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}})
+ ; ALL-DAG: sw $4, 24($sp)
+ ; ALL-DAG: move $4, $[[T0]]
+ ; ALL-DAG: sw $5, 20($sp)
+ ; ALL-DAG: lw $[[T1:[0-9]+]], 24($sp)
+ ; ALL-DAG: move $5, $[[T1]]
+ ; ALL-DAG: lw $6, 20($sp)
+ ; ALL-DAG: lw $[[T2:[0-9]+]], %got(memmove)(${{[0-9]+}})
+ ; ALL: jalr $[[T2]]
+ ; ALL-NEXT: nop
+ ; ALL-NOT: {{.*}}$2{{.*}}
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @dest, i32 0, i32 0),
+ i8* %src, i32 %i, i32 1, i1 false)
+ ret void
+define void @clear(i32 %i) {
+ ; ALL-LABEL: clear:
+ ; ALL-DAG: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}})
+ ; ALL-DAG: sw $4, 16($sp)
+ ; ALL-DAG: move $4, $[[T0]]
+ ; ALL-DAG: addiu $[[T1:[0-9]+]], $zero, 42
+ ; 32R1-DAG: sll $[[T2:[0-9]+]], $[[T1]], 24
+ ; 32R1-DAG: sra $5, $[[T2]], 24
+ ; 32R2-DAG: seb $5, $[[T1]]
+ ; ALL-DAG: lw $6, 16($sp)
+ ; ALL-DAG: lw $[[T2:[0-9]+]], %got(memset)(${{[0-9]+}})
+ ; ALL: jalr $[[T2]]
+ ; ALL-NEXT: nop
+ ; ALL-NOT: {{.*}}$2{{.*}}
+ call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @dest, i32 0, i32 0),
+ i8 42, i32 %i, i32 1, i1 false)
+ ret void
diff --git a/test/CodeGen/Mips/Fast-ISel/mul1.ll b/test/CodeGen/Mips/Fast-ISel/mul1.ll
new file mode 100644
index 0000000..0ee044b
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/mul1.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=mipsel -mcpu=mips32 -O0 \
+; RUN: -fast-isel -mips-fast-isel -relocation-model=pic
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -O0 \
+; RUN: -fast-isel -mips-fast-isel -relocation-model=pic
+; The test is just to make sure it is able to allocate
+; registers for this example. There was an issue with allocating AC0
+; after a mul instruction.
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32)
+define i32 @foo(i32 %a, i32 %b) {
+ %0 = mul i32 %a, %b
+ %1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %0, i32 %b)
+ %2 = extractvalue { i32, i1 } %1, 0
+ ret i32 %2
diff --git a/test/CodeGen/Mips/Fast-ISel/rem1.ll b/test/CodeGen/Mips/Fast-ISel/rem1.ll
new file mode 100644
index 0000000..9b5e440
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/rem1.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=mipsel -mcpu=mips32 -O0 -relocation-model=pic \
+; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -O0 -relocation-model=pic \
+; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s
+@sj = global i32 200, align 4
+@sk = global i32 -47, align 4
+@uj = global i32 200, align 4
+@uk = global i32 43, align 4
+@si = common global i32 0, align 4
+@ui = common global i32 0, align 4
+define void @rems() {
+ ; CHECK-LABEL: rems:
+ ; CHECK: lui $[[GOT1:[0-9]+]], %hi(_gp_disp)
+ ; CHECK: addiu $[[GOT2:[0-9]+]], $[[GOT1]], %lo(_gp_disp)
+ ; CHECK: addu $[[GOT:[0-9]+]], $[[GOT2:[0-9]+]], $25
+ ; CHECK-DAG: lw $[[I_ADDR:[0-9]+]], %got(si)($[[GOT]])
+ ; CHECK-DAG: lw $[[K_ADDR:[0-9]+]], %got(sk)($[[GOT]])
+ ; CHECK-DAG: lw $[[J_ADDR:[0-9]+]], %got(sj)($[[GOT]])
+ ; CHECK-DAG: lw $[[J:[0-9]+]], 0($[[J_ADDR]])
+ ; CHECK-DAG: lw $[[K:[0-9]+]], 0($[[K_ADDR]])
+ ; CHECK-DAG: div $zero, $[[J]], $[[K]]
+ ; CHECK_DAG: teq $[[K]], $zero, 7
+ ; CHECK-DAG: mfhi $[[RESULT:[0-9]+]]
+ ; CHECK: sw $[[RESULT]], 0($[[I_ADDR]])
+ %1 = load i32, i32* @sj, align 4
+ %2 = load i32, i32* @sk, align 4
+ %rem = srem i32 %1, %2
+ store i32 %rem, i32* @si, align 4
+ ret void
+; Function Attrs: noinline nounwind
+define void @remu() {
+ ; CHECK-LABEL: remu:
+ ; CHECK: lui $[[GOT1:[0-9]+]], %hi(_gp_disp)
+ ; CHECK: addiu $[[GOT2:[0-9]+]], $[[GOT1]], %lo(_gp_disp)
+ ; CHECK: addu $[[GOT:[0-9]+]], $[[GOT2:[0-9]+]], $25
+ ; CHECK-DAG: lw $[[I_ADDR:[0-9]+]], %got(ui)($[[GOT]])
+ ; CHECK-DAG: lw $[[K_ADDR:[0-9]+]], %got(uk)($[[GOT]])
+ ; CHECK-DAG: lw $[[J_ADDR:[0-9]+]], %got(uj)($[[GOT]])
+ ; CHECK-DAG: lw $[[J:[0-9]+]], 0($[[J_ADDR]])
+ ; CHECK-DAG: lw $[[K:[0-9]+]], 0($[[K_ADDR]])
+ ; CHECK-DAG: divu $zero, $[[J]], $[[K]]
+ ; CHECK_DAG: teq $[[K]], $zero, 7
+ ; CHECK-DAG: mfhi $[[RESULT:[0-9]+]]
+ ; CHECK: sw $[[RESULT]], 0($[[I_ADDR]])
+ %1 = load i32, i32* @uj, align 4
+ %2 = load i32, i32* @uk, align 4
+ %rem = urem i32 %1, %2
+ store i32 %rem, i32* @ui, align 4
+ ret void
diff --git a/test/CodeGen/Mips/Fast-ISel/sel1.ll b/test/CodeGen/Mips/Fast-ISel/sel1.ll
new file mode 100644
index 0000000..47b6a89
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/sel1.ll
@@ -0,0 +1,91 @@
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -O2 -relocation-model=pic \
+; RUN: -fast-isel -mips-fast-isel -fast-isel-abort=1 | FileCheck %s
+define i1 @sel_i1(i1 %j, i1 %k, i1 %l) {
+ ; CHECK-LABEL: sel_i1:
+ ; FIXME: The following instruction is redundant.
+ ; CHECK: xor $[[T0:[0-9]+]], $4, $zero
+ ; CHECK-NEXT: sltu $[[T1:[0-9]+]], $zero, $[[T0]]
+ ; CHECK-NEXT: movn $6, $5, $[[T1]]
+ ; CHECK: move $2, $6
+ %cond = icmp ne i1 %j, 0
+ %res = select i1 %cond, i1 %k, i1 %l
+ ret i1 %res
+define i8 @sel_i8(i8 %j, i8 %k, i8 %l) {
+ ; CHECK-LABEL: sel_i8:
+ ; CHECK-DAG: seb $[[T0:[0-9]+]], $4
+ ; FIXME: The following 2 instructions are redundant.
+ ; CHECK-DAG: seb $[[T1:[0-9]+]], $zero
+ ; CHECK: xor $[[T2:[0-9]+]], $[[T0]], $[[T1]]
+ ; CHECK-NEXT: sltu $[[T3:[0-9]+]], $zero, $[[T2]]
+ ; CHECK-NEXT: movn $6, $5, $[[T3]]
+ ; CHECK: move $2, $6
+ %cond = icmp ne i8 %j, 0
+ %res = select i1 %cond, i8 %k, i8 %l
+ ret i8 %res
+define i16 @sel_i16(i16 %j, i16 %k, i16 %l) {
+ ; CHECK-LABEL: sel_i16:
+ ; CHECK-DAG: seh $[[T0:[0-9]+]], $4
+ ; FIXME: The following 2 instructions are redundant.
+ ; CHECK-DAG: seh $[[T1:[0-9]+]], $zero
+ ; CHECK: xor $[[T2:[0-9]+]], $[[T0]], $[[T1]]
+ ; CHECK-NEXT: sltu $[[T3:[0-9]+]], $zero, $[[T2]]
+ ; CHECK-NEXT: movn $6, $5, $[[T3]]
+ ; CHECK: move $2, $6
+ %cond = icmp ne i16 %j, 0
+ %res = select i1 %cond, i16 %k, i16 %l
+ ret i16 %res
+define i32 @sel_i32(i32 %j, i32 %k, i32 %l) {
+ ; CHECK-LABEL: sel_i32:
+ ; FIXME: The following instruction is redundant.
+ ; CHECK: xor $[[T0:[0-9]+]], $4, $zero
+ ; CHECK-NEXT: sltu $[[T1:[0-9]+]], $zero, $[[T0]]
+ ; CHECK-NEXT: movn $6, $5, $[[T1]]
+ ; CHECK: move $2, $6
+ %cond = icmp ne i32 %j, 0
+ %res = select i1 %cond, i32 %k, i32 %l
+ ret i32 %res
+define float @sel_float(i32 %j, float %k, float %l) {
+ ; CHECK-LABEL: sel_float:
+ ; CHECK-DAG: mtc1 $6, $f0
+ ; CHECK-DAG: mtc1 $5, $f1
+ ; CHECK-DAG: xor $[[T0:[0-9]+]], $4, $zero
+ ; CHECK: sltu $[[T1:[0-9]+]], $zero, $[[T0]]
+ ; CHECK: movn.s $f0, $f1, $[[T1]]
+ %cond = icmp ne i32 %j, 0
+ %res = select i1 %cond, float %k, float %l
+ ret float %res
+define double @sel_double(i32 %j, double %k, double %l) {
+ ; CHECK-LABEL: sel_double:
+ ; CHECK-DAG: mtc1 $6, $f2
+ ; CHECK-DAG: mthc1 $7, $f2
+ ; CHECK-DAG: ldc1 $f0, 16($sp)
+ ; CHECK-DAG: xor $[[T0:[0-9]+]], $4, $zero
+ ; CHECK: sltu $[[T1:[0-9]+]], $zero, $[[T0]]
+ ; CHECK: movn.d $f0, $f2, $[[T1]]
+ %cond = icmp ne i32 %j, 0
+ %res = select i1 %cond, double %k, double %l
+ ret double %res
diff --git a/test/CodeGen/Mips/dynamic-stack-realignment.ll b/test/CodeGen/Mips/dynamic-stack-realignment.ll
new file mode 100644
index 0000000..777930a
--- /dev/null
+++ b/test/CodeGen/Mips/dynamic-stack-realignment.ll
@@ -0,0 +1,299 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
+; RUN: --check-prefix=ALL --check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
+; RUN: --check-prefix=ALL --check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
+; RUN: --check-prefix=ALL --check-prefix=GP32
+; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
+; RUN: --check-prefix=ALL --check-prefix=GP64 -check-prefix=N64
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
+; RUN: --check-prefix=ALL --check-prefix=GP64 -check-prefix=N64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
+; RUN: --check-prefix=ALL --check-prefix=GP64 -check-prefix=N64
+; RUN: llc < %s -march=mips64 -mcpu=mips3 -target-abi n32 | FileCheck %s \
+; RUN: --check-prefix=ALL --check-prefix=GP64 -check-prefix=N32
+; RUN: llc < %s -march=mips64 -mcpu=mips64 -target-abi n32 | FileCheck %s \
+; RUN: --check-prefix=ALL --check-prefix=GP64 -check-prefix=N32
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 -target-abi n32 | FileCheck %s \
+; RUN: --check-prefix=ALL --check-prefix=GP64 -check-prefix=N32
+; Check dynamic stack realignment in functions without variable-sized objects.
+declare void @helper_01(i32, i32, i32, i32, i32*)
+; O32 ABI
+define void @func_01() {
+; GP32-LABEL: func_01:
+ ; prologue
+ ; FIXME: We are currently over-allocating stack space. This particular case
+ ; needs a frame of up to between 16 and 512-bytes but currently
+ ; allocates between 1024 and 1536 bytes
+ ; GP32: addiu $sp, $sp, -1024
+ ; GP32: sw $ra, 1020($sp)
+ ; GP32: sw $fp, 1016($sp)
+ ;
+ ; GP32: move $fp, $sp
+ ; GP32: addiu $[[T0:[0-9]+|ra|gp]], $zero, -512
+ ; GP32-NEXT: and $sp, $sp, $[[T0]]
+ ; body
+ ; GP32: addiu $[[T1:[0-9]+]], $sp, 512
+ ; GP32: sw $[[T1]], 16($sp)
+ ; epilogue
+ ; GP32: move $sp, $fp
+ ; GP32: lw $fp, 1016($sp)
+ ; GP32: lw $ra, 1020($sp)
+ ; GP32: addiu $sp, $sp, 1024
+ %a = alloca i32, align 512
+ call void @helper_01(i32 0, i32 0, i32 0, i32 0, i32* %a)
+ ret void
+declare void @helper_02(i32, i32, i32, i32,
+ i32, i32, i32, i32, i32*)
+; N32/N64 ABIs
+define void @func_02() {
+; GP64-LABEL: func_02:
+ ; prologue
+ ; FIXME: We are currently over-allocating stack space. This particular case
+ ; needs a frame of up to between 16 and 512-bytes but currently
+ ; allocates between 1024 and 1536 bytes
+ ; N32: addiu $sp, $sp, -1024
+ ; N64: daddiu $sp, $sp, -1024
+ ; GP64: sd $ra, 1016($sp)
+ ; GP64: sd $fp, 1008($sp)
+ ; N32: sd $gp, 1000($sp)
+ ;
+ ; GP64: move $fp, $sp
+ ; N32: addiu $[[T0:[0-9]+|ra]], $zero, -512
+ ; N64: daddiu $[[T0:[0-9]+|ra]], $zero, -512
+ ; GP64-NEXT: and $sp, $sp, $[[T0]]
+ ; body
+ ; N32: addiu $[[T1:[0-9]+]], $sp, 512
+ ; N64: daddiu $[[T1:[0-9]+]], $sp, 512
+ ; GP64: sd $[[T1]], 0($sp)
+ ; epilogue
+ ; GP64: move $sp, $fp
+ ; N32: ld $gp, 1000($sp)
+ ; GP64: ld $fp, 1008($sp)
+ ; GP64: ld $ra, 1016($sp)
+ ; N32: addiu $sp, $sp, 1024
+ ; N64: daddiu $sp, $sp, 1024
+ %a = alloca i32, align 512
+ call void @helper_02(i32 0, i32 0, i32 0, i32 0,
+ i32 0, i32 0, i32 0, i32 0, i32* %a)
+ ret void
+; Verify that we use $fp for referencing incoming arguments.
+declare void @helper_03(i32, i32, i32, i32, i32*, i32*)
+; O32 ABI
+define void @func_03(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32* %b) {
+; GP32-LABEL: func_03:
+ ; body
+ ; FIXME: We are currently over-allocating stack space.
+ ; GP32-DAG: addiu $[[T0:[0-9]+]], $sp, 512
+ ; GP32-DAG: sw $[[T0]], 16($sp)
+ ; GP32-DAG: lw $[[T1:[0-9]+]], 1040($fp)
+ ; GP32-DAG: sw $[[T1]], 20($sp)
+ %a = alloca i32, align 512
+ call void @helper_03(i32 0, i32 0, i32 0, i32 0, i32* %a, i32* %b)
+ ret void
+declare void @helper_04(i32, i32, i32, i32,
+ i32, i32, i32, i32, i32*, i32*)
+; N32/N64 ABIs
+define void @func_04(i32 %p0, i32 %p1, i32 %p2, i32 %p3,
+ i32 %p4, i32 %p5, i32 %p6, i32 %p7,
+ i32* %b) {
+; GP64-LABEL: func_04:
+ ; body
+ ; FIXME: We are currently over-allocating stack space.
+ ; N32-DAG: addiu $[[T0:[0-9]+]], $sp, 512
+ ; N64-DAG: daddiu $[[T0:[0-9]+]], $sp, 512
+ ; GP64-DAG: sd $[[T0]], 0($sp)
+ ; GP64-DAG: ld $[[T1:[0-9]+]], 1024($fp)
+ ; GP64-DAG: sd $[[T1]], 8($sp)
+ %a = alloca i32, align 512
+ call void @helper_04(i32 0, i32 0, i32 0, i32 0,
+ i32 0, i32 0, i32 0, i32 0, i32* %a, i32* %b)
+ ret void
+; Check dynamic stack realignment in functions with variable-sized objects.
+; O32 ABI
+define void @func_05(i32 %sz) {
+; GP32-LABEL: func_05:
+ ; prologue
+ ; FIXME: We are currently over-allocating stack space.
+ ; GP32: addiu $sp, $sp, -1024
+ ; GP32: sw $fp, 1020($sp)
+ ; GP32: sw $23, 1016($sp)
+ ;
+ ; GP32: move $fp, $sp
+ ; GP32: addiu $[[T0:[0-9]+|gp]], $zero, -512
+ ; GP32-NEXT: and $sp, $sp, $[[T0]]
+ ; GP32-NEXT: move $23, $sp
+ ; body
+ ; GP32: addiu $[[T1:[0-9]+]], $zero, 222
+ ; GP32: sw $[[T1]], 508($23)
+ ; epilogue
+ ; GP32: move $sp, $fp
+ ; GP32: lw $23, 1016($sp)
+ ; GP32: lw $fp, 1020($sp)
+ ; GP32: addiu $sp, $sp, 1024
+ %a0 = alloca i32, i32 %sz, align 512
+ %a1 = alloca i32, align 4
+ store volatile i32 111, i32* %a0, align 512
+ store volatile i32 222, i32* %a1, align 4
+ ret void
+; N32/N64 ABIs
+define void @func_06(i32 %sz) {
+; GP64-LABEL: func_06:
+ ; prologue
+ ; FIXME: We are currently over-allocating stack space.
+ ; N32: addiu $sp, $sp, -1024
+ ; N64: daddiu $sp, $sp, -1024
+ ; GP64: sd $fp, 1016($sp)
+ ; GP64: sd $23, 1008($sp)
+ ;
+ ; GP64: move $fp, $sp
+ ; GP64: addiu $[[T0:[0-9]+|gp]], $zero, -512
+ ; GP64-NEXT: and $sp, $sp, $[[T0]]
+ ; GP64-NEXT: move $23, $sp
+ ; body
+ ; GP64: addiu $[[T1:[0-9]+]], $zero, 222
+ ; GP64: sw $[[T1]], 508($23)
+ ; epilogue
+ ; GP64: move $sp, $fp
+ ; GP64: ld $23, 1008($sp)
+ ; GP64: ld $fp, 1016($sp)
+ ; N32: addiu $sp, $sp, 1024
+ ; N64: daddiu $sp, $sp, 1024
+ %a0 = alloca i32, i32 %sz, align 512
+ %a1 = alloca i32, align 4
+ store volatile i32 111, i32* %a0, align 512
+ store volatile i32 222, i32* %a1, align 4
+ ret void
+; Verify that we use $fp for referencing incoming arguments and $sp for
+; building outbound arguments for nested function calls.
+; O32 ABI
+define void @func_07(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %sz) {
+; GP32-LABEL: func_07:
+ ; body
+ ; FIXME: We are currently over-allocating stack space.
+ ; GP32-DAG: lw $[[T0:[0-9]+]], 1040($fp)
+ ;
+ ; GP32-DAG: addiu $[[T1:[0-9]+]], $zero, 222
+ ; GP32-DAG: sw $[[T1]], 508($23)
+ ;
+ ; GP32-DAG: sw $[[T2:[0-9]+]], 16($sp)
+ %a0 = alloca i32, i32 %sz, align 512
+ %a1 = alloca i32, align 4
+ store volatile i32 111, i32* %a0, align 512
+ store volatile i32 222, i32* %a1, align 4
+ call void @helper_01(i32 0, i32 0, i32 0, i32 0, i32* %a1)
+ ret void
+; N32/N64 ABIs
+define void @func_08(i32 %p0, i32 %p1, i32 %p2, i32 %p3,
+ i32 %p4, i32 %p5, i32 %p6, i32 %p7,
+ i32 %sz) {
+; GP64-LABEL: func_08:
+ ; body
+ ; FIXME: We are currently over-allocating stack space.
+ ; N32-DAG: lw $[[T0:[0-9]+]], 1028($fp)
+ ; N64-DAG: lwu $[[T0:[0-9]+]], 1028($fp)
+ ;
+ ; GP64-DAG: addiu $[[T1:[0-9]+]], $zero, 222
+ ; GP64-DAG: sw $[[T1]], 508($23)
+ ;
+ ; GP64-DAG: sd $[[T2:[0-9]+]], 0($sp)
+ %a0 = alloca i32, i32 %sz, align 512
+ %a1 = alloca i32, align 4
+ store volatile i32 111, i32* %a0, align 512
+ store volatile i32 222, i32* %a1, align 4
+ call void @helper_02(i32 0, i32 0, i32 0, i32 0,
+ i32 0, i32 0, i32 0, i32 0, i32* %a1)
+ ret void
+; Check that we do not perform dynamic stack realignment in the presence of
+; the "no-realign-stack" function attribute.
+define void @func_09() "no-realign-stack" {
+; ALL-LABEL: func_09:
+ ; ALL-NOT: and $sp, $sp, $[[T0:[0-9]+|ra|gp]]
+ %a = alloca i32, align 512
+ call void @helper_01(i32 0, i32 0, i32 0, i32 0, i32* %a)
+ ret void
+define void @func_10(i32 %sz) "no-realign-stack" {
+; ALL-LABEL: func_10:
+ ; ALL-NOT: and $sp, $sp, $[[T0:[0-9]+|ra|gp]]
+ %a0 = alloca i32, i32 %sz, align 512
+ %a1 = alloca i32, align 4
+ store volatile i32 111, i32* %a0, align 512
+ store volatile i32 222, i32* %a1, align 4
+ ret void
diff --git a/test/CodeGen/Mips/ehframe-indirect.ll b/test/CodeGen/Mips/ehframe-indirect.ll
index f124881..dc06ef7 100644
--- a/test/CodeGen/Mips/ehframe-indirect.ll
+++ b/test/CodeGen/Mips/ehframe-indirect.ll
@@ -1,9 +1,11 @@
-; RUN: llc -mtriple=mipsel-linux-gnu < %s | FileCheck -check-prefix=ALL -check-prefix=O32 %s
-; RUN: llc -mtriple=mipsel-linux-android < %s | FileCheck -check-prefix=ALL -check-prefix=O32 %s
-; RUN: llc -mtriple=mips64el-linux-gnu -target-abi=n32 < %s | FileCheck -check-prefix=ALL -check-prefix=N32 %s
-; RUN: llc -mtriple=mips64el-linux-android -target-abi=n32 < %s | FileCheck -check-prefix=ALL -check-prefix=N32 %s
-; RUN: llc -mtriple=mips64el-linux-gnu < %s | FileCheck -check-prefix=ALL -check-prefix=N64 %s
-; RUN: llc -mtriple=mips64el-linux-android < %s | FileCheck -check-prefix=ALL -check-prefix=N64 %s
+; RUN: llc -mtriple=mipsel-linux-gnu < %s -asm-verbose | FileCheck -check-prefix=ALL -check-prefix=O32 %s
+; RUN: llc -mtriple=mipsel-linux-android < %s -asm-verbose | FileCheck -check-prefix=ALL -check-prefix=O32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -target-abi=n32 < %s -asm-verbose | FileCheck -check-prefix=ALL -check-prefix=N32 %s
+; RUN: llc -mtriple=mips64el-linux-android -target-abi=n32 < %s -asm-verbose | FileCheck -check-prefix=ALL -check-prefix=N32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu < %s -asm-verbose | FileCheck -check-prefix=ALL -check-prefix=N64 %s
+; RUN: llc -mtriple=mips64el-linux-android < %s -asm-verbose | FileCheck -check-prefix=ALL -check-prefix=N64 %s
+@_ZTISt9exception = external constant i8*
define i32 @main() {
; ALL: .cfi_startproc
@@ -16,7 +18,9 @@ entry:
%0 = landingpad { i8*, i32 } personality i8*
- bitcast (i32 (...)* @__gxx_personality_v0 to i8*) catch i8* null
+ bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ catch i8* null
+ catch i8* bitcast (i8** @_ZTISt9exception to i8*)
ret i32 0
@@ -28,6 +32,14 @@ declare i32 @__gxx_personality_v0(...)
declare void @foo()
+; ALL: GCC_except_table{{[0-9]+}}:
+; ALL: .byte 155 # @TType Encoding = indirect pcrel sdata4
+; ALL: $[[PC_LABEL:tmp[0-9]+]]:
+; ALL: .4byte ($_ZTISt9exception.DW.stub)-($[[PC_LABEL]])
+; ALL: $_ZTISt9exception.DW.stub:
+; O32: .4byte _ZTISt9exception
+; N32: .4byte _ZTISt9exception
+; N64: .8byte _ZTISt9exception
; ALL: .hidden DW.ref.__gxx_personality_v0
; ALL: .weak DW.ref.__gxx_personality_v0
; ALL: .section .data.DW.ref.__gxx_personality_v0,"aGw",@progbits,DW.ref.__gxx_personality_v0,comdat
diff --git a/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll b/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll
index 3dc1cde..779620e 100644
--- a/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll
+++ b/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll
@@ -1,8 +1,8 @@
; Check that register scavenging spill slot is close to $fp.
; RUN: llc -march=mipsel -O0 < %s | FileCheck %s
-; CHECK: sw ${{.*}}, 4($fp)
-; CHECK: lw ${{.*}}, 4($fp)
+; CHECK: sw ${{.*}}, 4($sp)
+; CHECK: lw ${{.*}}, 4($sp)
define i32 @main(i32 signext %argc, i8** %argv) "no-frame-pointer-elim"="true" {
diff --git a/test/CodeGen/NVPTX/access-non-generic.ll b/test/CodeGen/NVPTX/access-non-generic.ll
index e709302..5deefe8 100644
--- a/test/CodeGen/NVPTX/access-non-generic.ll
+++ b/test/CodeGen/NVPTX/access-non-generic.ll
@@ -85,6 +85,22 @@ define i32 @ld_int_from_float() {
ret i32 %1
+define i32 @ld_int_from_global_float(float addrspace(1)* %input, i32 %i, i32 %j) {
+; IR-LABEL: @ld_int_from_global_float(
+; PTX-LABEL: ld_int_from_global_float(
+ %1 = addrspacecast float addrspace(1)* %input to float*
+ %2 = getelementptr float, float* %1, i32 %i
+; IR-NEXT: getelementptr float, float addrspace(1)* %input, i32 %i
+ %3 = getelementptr float, float* %2, i32 %j
+; IR-NEXT: getelementptr float, float addrspace(1)* {{%[^,]+}}, i32 %j
+ %4 = bitcast float* %3 to i32*
+; IR-NEXT: bitcast float addrspace(1)* {{%[^ ]+}} to i32 addrspace(1)*
+ %5 = load i32, i32* %4
+; IR-NEXT: load i32, i32 addrspace(1)* {{%.+}}
+ ret i32 %5
declare void @llvm.cuda.syncthreads() #3
attributes #3 = { noduplicate nounwind }
diff --git a/test/CodeGen/NVPTX/bug21465.ll b/test/CodeGen/NVPTX/bug21465.ll
index 76af386..2eae41f 100644
--- a/test/CodeGen/NVPTX/bug21465.ll
+++ b/test/CodeGen/NVPTX/bug21465.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -nvptx-lower-struct-args -S | FileCheck %s
+; RUN: opt < %s -nvptx-lower-kernel-args -S | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix PTX
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
target triple = "nvptx64-unknown-unknown"
@@ -8,12 +9,15 @@ target triple = "nvptx64-unknown-unknown"
; Function Attrs: nounwind
define void @_Z11TakesStruct1SPi(%struct.S* byval nocapture readonly %input, i32* nocapture %output) #0 {
-; CHECK-LABEL @_Z22TakesStruct1SPi
-; CHECK: bitcast %struct.S* %input to i8*
-; CHECK: call i8 addrspace(101)*
+; CHECK-LABEL: @_Z11TakesStruct1SPi
+; PTX-LABEL: .visible .entry _Z11TakesStruct1SPi(
+; CHECK: addrspacecast %struct.S* %input to %struct.S addrspace(101)*
%b = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
%0 = load i32, i32* %b, align 4
+; PTX-NOT: ld.param.u32 {{%r[0-9]+}}, [{{%rd[0-9]+}}]
+; PTX: ld.param.u32 [[value:%r[0-9]+]], [{{%rd[0-9]+}}+4]
store i32 %0, i32* %output, align 4
+; PTX-NEXT: [{{%rd[0-9]+}}], [[value]]
ret void
diff --git a/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
index 58b1911..c70670d 100644
--- a/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
+++ b/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
@@ -24,7 +24,10 @@ entry:
; CHECK: cvta.local.u64 %SP, %rd[[BUF_REG]]
; CHECK: ld.param.u64 %rd[[A_REG:[0-9]+]], [kernel_func_param_0]
-; CHECK: ld.f32 %f[[A0_REG:[0-9]+]], [%rd[[A_REG]]]
+; CHECK: %rd[[A1_REG:[0-9]+]], %rd[[A_REG]]
+; FIXME: casting A1_REG to A2_REG is unnecessary; A2_REG is essentially A_REG
+; CHECK: %rd[[A2_REG:[0-9]+]], %rd[[A1_REG]]
+; CHECK: %f[[A0_REG:[0-9]+]], [%rd[[A1_REG]]]
; CHECK: st.f32 [%SP+0], %f[[A0_REG]]
%0 = load float, float* %a, align 4
@@ -48,7 +51,7 @@ entry:
; CHECK: add.u64 %rd[[SP_REG:[0-9]+]], %SP, 0
; CHECK: .param .b64 param0;
-; CHECK-NEXT: st.param.b64 [param0+0], %rd[[A_REG]]
+; CHECK-NEXT: st.param.b64 [param0+0], %rd[[A2_REG]]
; CHECK-NEXT: .param .b64 param1;
; CHECK-NEXT: st.param.b64 [param1+0], %rd[[SP_REG]]
; CHECK-NEXT: call.uni
diff --git a/test/CodeGen/NVPTX/globals_init.ll b/test/CodeGen/NVPTX/globals_init.ll
new file mode 100644
index 0000000..5b45f41
--- /dev/null
+++ b/test/CodeGen/NVPTX/globals_init.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; Make sure the globals constant initializers are not prone to host endianess
+; issues.
+; CHECK-DAG: .b8 Gbli08[2] = {171, 205};
+@Gbli08 = global [2 x i8] [i8 171, i8 205]
+; CHECK-DAG: .b8 Gbli16[4] = {205, 171, 1, 239};
+@Gbli16 = global [2 x i16] [i16 43981, i16 61185]
+; CHECK-DAG: .b8 Gbli32[8] = {1, 239, 205, 171, 137, 103, 69, 35};
+@Gbli32 = global [2 x i32] [i32 2882400001, i32 591751049]
+; CHECK-DAG: .b8 Gbli64[16] = {137, 103, 69, 35, 1, 239, 205, 171, 239, 205, 171, 137, 103, 69, 35, 1};
+@Gbli64 = global [2 x i64] [i64 12379813738877118345, i64 81985529216486895]
+; CHECK-DAG: .b8 Gblf32[8] = {192, 225, 100, 75, 0, 96, 106, 69};
+@Gblf32 = global [2 x float] [float 1.5e+7, float 3.75e+3]
+; CHECK-DAG: .b8 Gblf64[16] = {116, 10, 181, 48, 134, 62, 230, 58, 106, 222, 138, 98, 204, 250, 200, 75};
+@Gblf64 = global [2 x double] [double 5.75e-25, double 12.25e+56]
diff --git a/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll b/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
new file mode 100644
index 0000000..53220bd
--- /dev/null
+++ b/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-unknown-unknown"
+; Verify that both %input and %output are converted to global pointers and then
+; addrspacecast'ed back to the original type.
+define void @kernel(float* %input, float* %output) {
+; CHECK-LABEL: .visible .entry kernel(
+ %1 = load float, float* %input, align 4
+ store float %1, float* %output, align 4
+ ret void
+!nvvm.annotations = !{!0}
+!0 = !{void (float*, float*)* @kernel, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/pr13291-i1-store.ll b/test/CodeGen/NVPTX/pr13291-i1-store.ll
index d4f7c3b..934df30 100644
--- a/test/CodeGen/NVPTX/pr13291-i1-store.ll
+++ b/test/CodeGen/NVPTX/pr13291-i1-store.ll
@@ -3,19 +3,19 @@
define ptx_kernel void @t1(i1* %a) {
; PTX32: mov.u16 %rs{{[0-9]+}}, 0;
-; PTX32-NEXT: st.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}};
+; PTX32-NEXT: [%r{{[0-9]+}}], %rs{{[0-9]+}};
; PTX64: mov.u16 %rs{{[0-9]+}}, 0;
-; PTX64-NEXT: st.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}};
+; PTX64-NEXT: [%rd{{[0-9]+}}], %rs{{[0-9]+}};
store i1 false, i1* %a
ret void
define ptx_kernel void @t2(i1* %a, i8* %b) {
-; PTX32: ld.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: %rs{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
; PTX32: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
-; PTX64: ld.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; PTX64: %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
; PTX64: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
diff --git a/test/CodeGen/NVPTX/surf-read-cuda.ll b/test/CodeGen/NVPTX/surf-read-cuda.ll
index ed02134..c17c71e 100644
--- a/test/CodeGen/NVPTX/surf-read-cuda.ll
+++ b/test/CodeGen/NVPTX/surf-read-cuda.ll
@@ -18,8 +18,8 @@ define void @foo(i64 %img, float* %red, i32 %idx) {
; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
%ret = sitofp i32 %val to float
-; SM20: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
-; SM30: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
+; SM20: [%r{{[0-9]+}}], %f[[REDF]]
+; SM30: [%r{{[0-9]+}}], %f[[REDF]]
store float %ret, float* %red
ret void
@@ -37,8 +37,8 @@ define void @bar(float* %red, i32 %idx) {
; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
%ret = sitofp i32 %val to float
-; SM20: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
-; SM30: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
+; SM20: [%r{{[0-9]+}}], %f[[REDF]]
+; SM30: [%r{{[0-9]+}}], %f[[REDF]]
store float %ret, float* %red
ret void
diff --git a/test/CodeGen/NVPTX/tex-read-cuda.ll b/test/CodeGen/NVPTX/tex-read-cuda.ll
index c5b5600..d5f7c16 100644
--- a/test/CodeGen/NVPTX/tex-read-cuda.ll
+++ b/test/CodeGen/NVPTX/tex-read-cuda.ll
@@ -16,8 +16,8 @@ define void @foo(i64 %img, float* %red, i32 %idx) {
; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}]
%val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %img, i32 %idx)
%ret = extractvalue { float, float, float, float } %val, 0
-; SM20: st.f32 [%r{{[0-9]+}}], %f[[RED]]
-; SM30: st.f32 [%r{{[0-9]+}}], %f[[RED]]
+; SM20: [%r{{[0-9]+}}], %f[[RED]]
+; SM30: [%r{{[0-9]+}}], %f[[RED]]
store float %ret, float* %red
ret void
@@ -34,8 +34,8 @@ define void @bar(float* %red, i32 %idx) {
; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}]
%val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx)
%ret = extractvalue { float, float, float, float } %val, 0
-; SM20: st.f32 [%r{{[0-9]+}}], %f[[RED]]
-; SM30: st.f32 [%r{{[0-9]+}}], %f[[RED]]
+; SM20: [%r{{[0-9]+}}], %f[[RED]]
+; SM30: [%r{{[0-9]+}}], %f[[RED]]
store float %ret, float* %red
ret void
diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll
index ab5251b..9cfef39 100644
--- a/test/CodeGen/PowerPC/fma.ll
+++ b/test/CodeGen/PowerPC/fma.ll
@@ -1,9 +1,12 @@
; RUN: llc < %s -march=ppc32 -fp-contract=fast -mattr=-vsx | FileCheck %s
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mattr=+vsx -mcpu=pwr7 | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mcpu=pwr8 | FileCheck -check-prefix=CHECK-P8 %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -fp-contract=fast -mcpu=pwr8 | FileCheck -check-prefix=CHECK-P8 %s
declare double @dummy1(double) #0
declare double @dummy2(double, double) #0
declare double @dummy3(double, double, double) #0
+declare float @dummy4(float, float) #0
define double @test_FMADD1(double %A, double %B, double %C) {
%D = fmul double %A, %B ; <double> [#uses=1]
@@ -126,3 +129,83 @@ define float @test_FNMSUBS(float %A, float %B, float %C) {
; CHECK-VSX: fnmsubs
+define float @test_XSMADDMSP(float %A, float %B, float %C) {
+ %D = fmul float %A, %B ; <float> [#uses=1]
+ %E = fadd float %C, %D ; <float> [#uses=1]
+ ret float %E
+; CHECK-P8: xsmaddmsp
+; CHECK-P8-NEXT: blr
+define float @test_XSMSUBMSP(float %A, float %B, float %C) {
+ %D = fmul float %A, %B ; <float> [#uses=1]
+ %E = fsub float %D, %C ; <float> [#uses=1]
+ ret float %E
+; CHECK-P8: xsmsubmsp
+; CHECK-P8-NEXT: blr
+define float @test_XSMADDASP(float %A, float %B, float %C, float %D) {
+ %E = fmul float %A, %B ; <float> [#uses=2]
+ %F = fadd float %E, %C ; <float> [#uses=1]
+ %G = fsub float %E, %D ; <float> [#uses=1]
+ %H = call float @dummy4(float %F, float %G) ; <float> [#uses=1]
+ ret float %H
+; CHECK-P8: xsmaddasp
+; CHECK-P8-NEXT: xsmsubmsp
+define float @test_XSMSUBASP(float %A, float %B, float %C, float %D) {
+ %E = fmul float %A, %B ; <float> [#uses=2]
+ %F = fsub float %E, %C ; <float> [#uses=1]
+ %G = fsub float %E, %D ; <float> [#uses=1]
+ %H = call float @dummy4(float %F, float %G) ; <float> [#uses=1]
+ ret float %H
+; CHECK-P8: xsmsubasp
+; CHECK-P8-NEXT: xsmsubmsp
+define float @test_XSNMADDMSP(float %A, float %B, float %C) {
+ %D = fmul float %A, %B ; <float> [#uses=1]
+ %E = fadd float %D, %C ; <float> [#uses=1]
+ %F = fsub float -0.000000e+00, %E ; <float> [#uses=1]
+ ret float %F
+; CHECK-P8: xsnmaddmsp
+; CHECK-P8-NEXT: blr
+define float @test_XSNMSUBMSP(float %A, float %B, float %C) {
+ %D = fmul float %A, %B ; <float> [#uses=1]
+ %E = fsub float %D, %C ; <float> [#uses=1]
+ %F = fsub float -0.000000e+00, %E ; <float> [#uses=1]
+ ret float %F
+; CHECK-P8: xsnmsubmsp
+; CHECK-P8-NEXT: blr
+define float @test_XSNMADDASP(float %A, float %B, float %C) {
+ %D = fmul float %A, %B ; <float> [#uses=1]
+ %E = fadd float %D, %C ; <float> [#uses=1]
+ %F = fsub float -0.000000e+00, %E ; <float> [#uses=1]
+ %H = call float @dummy4(float %E, float %F) ; <float> [#uses=1]
+ ret float %F
+; CHECK-P8: xsnmaddasp
+define float @test_XSNMSUBASP(float %A, float %B, float %C) {
+ %D = fmul float %A, %B ; <float> [#uses=1]
+ %E = fsub float %D, %C ; <float> [#uses=1]
+ %F = fsub float -0.000000e+00, %E ; <float> [#uses=1]
+ %H = call float @dummy4(float %E, float %F) ; <float> [#uses=1]
+ ret float %F
+; CHECK-P8: xsnmsubasp
diff --git a/test/CodeGen/PowerPC/vsx-fma-sp.ll b/test/CodeGen/PowerPC/vsx-fma-sp.ll
new file mode 100644
index 0000000..1c3e457
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-fma-sp.ll
@@ -0,0 +1,167 @@
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx -fast-isel -O0 | FileCheck -check-prefix=CHECK-FISL %s
+define void @test1sp(float %a, float %b, float %c, float %e, float* nocapture %d) #0 {
+ %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+ store float %0, float* %d, align 4
+ %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+ %arrayidx1 = getelementptr inbounds float, float* %d, i64 1
+ store float %1, float* %arrayidx1, align 4
+ ret void
+; CHECK-LABEL: @test1sp
+; CHECK-DAG: li [[C1:[0-9]+]], 4
+; CHECK-DAG: xsmaddmsp 3, 2, 1
+; CHECK-DAG: xsmaddasp 1, 2, 4
+; CHECK-DAG: stxsspx 3, 0, 7
+; CHECK-DAG: stxsspx 1, 7, [[C1]]
+; CHECK: blr
+; CHECK-FISL-LABEL: @test1sp
+; CHECK-FISL-DAG: fmr 0, 1
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 3
+; CHECK-FISL-DAG: stxsspx 0, 0, 7
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 4
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 1, 7, [[C1]]
+; CHECK-FISL: blr
+define void @test2sp(float %a, float %b, float %c, float %e, float %f, float* nocapture %d) #0 {
+ %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+ store float %0, float* %d, align 4
+ %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+ %arrayidx1 = getelementptr inbounds float, float* %d, i64 1
+ store float %1, float* %arrayidx1, align 4
+ %2 = tail call float @llvm.fma.f32(float %b, float %f, float %a)
+ %arrayidx2 = getelementptr inbounds float, float* %d, i64 2
+ store float %2, float* %arrayidx2, align 4
+ ret void
+; CHECK-LABEL: @test2sp
+; CHECK-DAG: li [[C1:[0-9]+]], 4
+; CHECK-DAG: li [[C2:[0-9]+]], 8
+; CHECK-DAG: xsmaddmsp 3, 2, 1
+; CHECK-DAG: xsmaddmsp 4, 2, 1
+; CHECK-DAG: xsmaddasp 1, 2, 5
+; CHECK-DAG: stxsspx 3, 0, 8
+; CHECK-DAG: stxsspx 4, 8, [[C1]]
+; CHECK-DAG: stxsspx 1, 8, [[C2]]
+; CHECK: blr
+; CHECK-FISL-LABEL: @test2sp
+; CHECK-FISL-DAG: fmr 0, 1
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 3
+; CHECK-FISL-DAG: stxsspx 0, 0, 8
+; CHECK-FISL-DAG: fmr 0, 1
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 4
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C1]]
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 5
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-FISL: blr
+define void @test3sp(float %a, float %b, float %c, float %e, float %f, float* nocapture %d) #0 {
+ %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+ store float %0, float* %d, align 4
+ %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+ %2 = tail call float @llvm.fma.f32(float %b, float %c, float %1)
+ %arrayidx1 = getelementptr inbounds float, float* %d, i64 3
+ store float %2, float* %arrayidx1, align 4
+ %3 = tail call float @llvm.fma.f32(float %b, float %f, float %a)
+ %arrayidx2 = getelementptr inbounds float, float* %d, i64 2
+ store float %3, float* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds float, float* %d, i64 1
+ store float %1, float* %arrayidx3, align 4
+ ret void
+; CHECK-LABEL: @test3sp
+; CHECK-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-DAG: li [[C1:[0-9]+]], 12
+; CHECK-DAG: li [[C2:[0-9]+]], 8
+; CHECK-DAG: li [[C3:[0-9]+]], 4
+; CHECK-DAG: xsmaddmsp 4, 2, 1
+; CHECK-DAG: xsmaddasp 1, 2, 5
+; Note: We could convert this next FMA to M-type as well, but it would require
+; re-ordering the instructions.
+; CHECK-DAG: xsmaddasp [[F1]], 2, 3
+; CHECK-DAG: xsmaddmsp 3, 2, 4
+; CHECK-DAG: stxsspx [[F1]], 0, 8
+; CHECK-DAG: stxsspx 3, 8, [[C1]]
+; CHECK-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-DAG: stxsspx 4, 8, [[C3]]
+; CHECK: blr
+; CHECK-FISL-LABEL: @test3sp
+; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-FISL-DAG: xsmaddasp [[F1]], 2, 4
+; CHECK-FISL-DAG: fmr 4, [[F1]]
+; CHECK-FISL-DAG: xsmaddasp 4, 2, 3
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 12
+; CHECK-FISL-DAG: stxsspx 4, 8, [[C1]]
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 5
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-FISL-DAG: li [[C3:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C3]]
+; CHECK-FISL: blr
+define void @test4sp(float %a, float %b, float %c, float %e, float %f, float* nocapture %d) #0 {
+ %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+ store float %0, float* %d, align 4
+ %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+ %arrayidx1 = getelementptr inbounds float, float* %d, i64 1
+ store float %1, float* %arrayidx1, align 4
+ %2 = tail call float @llvm.fma.f32(float %b, float %c, float %1)
+ %arrayidx3 = getelementptr inbounds float, float* %d, i64 3
+ store float %2, float* %arrayidx3, align 4
+ %3 = tail call float @llvm.fma.f32(float %b, float %f, float %a)
+ %arrayidx4 = getelementptr inbounds float, float* %d, i64 2
+ store float %3, float* %arrayidx4, align 4
+ ret void
+; CHECK-LABEL: @test4sp
+; CHECK-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-DAG: li [[C1:[0-9]+]], 4
+; CHECK-DAG: li [[C2:[0-9]+]], 8
+; CHECK-DAG: xsmaddmsp 4, 2, 1
+; Note: We could convert this next FMA to M-type as well, but it would require
+; re-ordering the instructions.
+; CHECK-DAG: xsmaddasp 1, 2, 5
+; CHECK-DAG: xsmaddasp [[F1]], 2, 3
+; CHECK-DAG: stxsspx [[F1]], 0, 8
+; CHECK-DAG: stxsspx 4, 8, [[C1]]
+; CHECK-DAG: li [[C3:[0-9]+]], 12
+; CHECK-DAG: xsmaddasp 4, 2, 3
+; CHECK-DAG: stxsspx 4, 8, [[C3]]
+; CHECK-DAG: stxsspx 1, 8, [[C2]]
+; CHECK: blr
+; CHECK-FISL-LABEL: @test4sp
+; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-FISL-DAG: xsmaddasp [[F1]], 2, 3
+; CHECK-FISL-DAG: stxsspx 0, 0, 8
+; CHECK-FISL-DAG: fmr [[F1]], 1
+; CHECK-FISL-DAG: xsmaddasp [[F1]], 2, 4
+; CHECK-FISL-DAG: li [[C3:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C3]]
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 3
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 12
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C1]]
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 5
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-FISL: blr
+declare float @llvm.fma.f32(float, float, float) #0
diff --git a/test/CodeGen/R600/cgp-addressing-modes.ll b/test/CodeGen/R600/cgp-addressing-modes.ll
new file mode 100644
index 0000000..3d36bd1
--- /dev/null
+++ b/test/CodeGen/R600/cgp-addressing-modes.ll
@@ -0,0 +1,242 @@
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefix=OPT %s
+; RUN: llc -march=amdgcn -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN %s
+declare i32 #0
+; OPT-LABEL: @test_sink_global_small_offset_i32(
+; OPT-NOT: getelementptr i32, i32 addrspace(1)* %in
+; OPT: br i1
+; OPT: ptrtoint
+; GCN-LABEL: {{^}}test_sink_global_small_offset_i32:
+; GCN: {{^}}BB0_2:
+define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) {
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+ %tmp1 = load i32, i32 addrspace(1)* %in.gep
+ br label %endif
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+ ret void
+; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset(
+; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
+; OPT: br i1
+; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset:
+; GCN: s_and_saveexec_b64
+; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
+; GCN: {{^}}BB1_2:
+; GCN: s_or_b64 exec
+define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
+ %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+ %tmp1 = load i8, i8 addrspace(1)* %in.gep
+ %tmp2 = sext i8 %tmp1 to i32
+ br label %endif
+ %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+ ret void
+; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
+; GCN: s_and_saveexec_b64
+; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
+; GCN: {{^}}BB2_2:
+; GCN: s_or_b64 exec
+define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
+ %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+ %tmp1 = load i8, i8 addrspace(1)* %in.gep
+ %tmp2 = sext i8 %tmp1 to i32
+ br label %endif
+ %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+ ret void
+; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset:
+; GCN: s_and_saveexec_b64
+; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
+; GCN: {{^}}BB3_2:
+; GCN: s_or_b64 exec
+define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
+ %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+ %tmp1 = load i8, i8 addrspace(1)* %in.gep
+ %tmp2 = sext i8 %tmp1 to i32
+ br label %endif
+ %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+ ret void
+; OPT-LABEL: @test_no_sink_flat_small_offset_i32(
+; OPT: getelementptr i32, i32 addrspace(4)* %in
+; OPT: br i1
+; OPT-NOT: ptrtoint
+; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
+; GCN: flat_load_dword
+; GCN: {{^}}BB4_2:
+define void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
+ %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+ %tmp1 = load i32, i32 addrspace(4)* %in.gep
+ br label %endif
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(4)* %out.gep
+ br label %done
+ ret void
+; OPT-LABEL: @test_sink_scratch_small_offset_i32(
+; OPT-NOT: getelementptr [512 x i32]
+; OPT: br i1
+; OPT: ptrtoint
+; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32:
+; GCN: s_and_saveexec_b64
+; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
+; GCN: {{^}}BB5_2:
+define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
+ %alloca = alloca [512 x i32], align 4
+ %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %add.arg = add i32 %arg, 8
+ %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+ store volatile i32 123, i32* %alloca.gep
+ %tmp1 = load volatile i32, i32* %alloca.gep
+ br label %endif
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep.0
+ %load = load volatile i32, i32* %alloca.gep
+ store i32 %load, i32 addrspace(1)* %out.gep.1
+ br label %done
+ ret void
+; OPT-LABEL: @test_no_sink_scratch_large_offset_i32(
+; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
+; OPT: br i1
+; OPT-NOT: ptrtoint
+; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32:
+; GCN: s_and_saveexec_b64
+; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+; GCN: {{^}}BB6_2:
+define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
+ %alloca = alloca [512 x i32], align 4
+ %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %add.arg = add i32 %arg, 8
+ %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+ store volatile i32 123, i32* %alloca.gep
+ %tmp1 = load volatile i32, i32* %alloca.gep
+ br label %endif
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep.0
+ %load = load volatile i32, i32* %alloca.gep
+ store i32 %load, i32 addrspace(1)* %out.gep.1
+ br label %done
+ ret void
+; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32:
+; GCN: s_and_saveexec_b64
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; GCN: {{^}}BB7_2:
+define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset, i32 %cond) {
+ %offset.ext = zext i32 %offset to i64
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+ %tmp1 = load i32, i32 addrspace(1)* %in.gep
+ br label %endif
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+ ret void
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/R600/coalescer_remat.ll b/test/CodeGen/R600/coalescer_remat.ll
index f78a77b..96730bc 100644
--- a/test/CodeGen/R600/coalescer_remat.ll
+++ b/test/CodeGen/R600/coalescer_remat.ll
@@ -1,5 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs -o - %s | FileCheck %s
-target triple="amdgcn--"
+; RUN: llc -march=amdgcn -verify-machineinstrs -mtriple=amdgcn-- -o - %s | FileCheck %s
declare float @llvm.fma.f32(float, float, float)
@@ -12,7 +11,8 @@ declare float @llvm.fma.f32(float, float, float)
; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0
; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0
; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0
-; CHECK: ; NumVgprs: 12
+; It's probably OK if this is slightly higher:
+; CHECK: ; NumVgprs: 9
define void @foobar(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in, i32 %flag) {
%cmpflag = icmp eq i32 %flag, 1
diff --git a/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll b/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll
index 0aecc18..5851720 100644
--- a/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll
+++ b/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll
@@ -1,12 +1,10 @@
-; RUN: opt -codegenprepare -S -o - %s | FileCheck --check-prefix=OPT %s
-; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-LLC %s
+; RUN: opt -mtriple=amdgcn-- -codegenprepare -S < %s | FileCheck -check-prefix=OPT %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI-LLC %s
-target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
-target triple = "r600--"
-; OPT-LABEL: @test
+; OPT-LABEL: @test(
; OPT: mul nsw i32
; OPT-NEXT: sext
; SI-LLC-LABEL: {{^}}test:
; SI-LLC: s_mul_i32
; SI-LLC-NOT: mul
diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll
index 68ebc4d..addc409 100644
--- a/test/CodeGen/R600/fmul.ll
+++ b/test/CodeGen/R600/fmul.ll
@@ -73,4 +73,20 @@ define void @test_mul_2_k_inv(float addrspace(1)* %out, float %x) #0 {
ret void
+; There should be three multiplies here; %a should be used twice (once
+; negated), not duplicated into mul x, 5.0 and mul x, -5.0.
+; FUNC-LABEL: {{^}}test_mul_twouse:
+; SI: v_mul_f32
+; SI: v_mul_f32
+; SI: v_mul_f32
+; SI-NOT: v_mul_f32
+define void @test_mul_twouse(float addrspace(1)* %out, float %x, float %y) #0 {
+ %a = fmul float %x, 5.0
+ %b = fsub float -0.0, %a
+ %c = fmul float %b, %y
+ %d = fmul float %c, %a
+ store float %d, float addrspace(1)* %out
+ ret void
attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/R600/half.ll b/test/CodeGen/R600/half.ll
index 42ee788..bf8f118 100644
--- a/test/CodeGen/R600/half.ll
+++ b/test/CodeGen/R600/half.ll
@@ -1,62 +1,525 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI | FileCheck %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
-define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) {
-; CHECK-LABEL: {{^}}test_load_store:
-; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
-; CHECK: buffer_store_short [[TMP]]
+; half args should be promoted to float
+; GCN-LABEL: {{^}}load_f16_arg:
+; GCN: s_load_dword [[ARG:s[0-9]+]]
+; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[ARG]]
+; GCN: buffer_store_short [[CVT]]
+define void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 {
+ store half %arg, half addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}load_v2f16_arg:
+; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
+; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
+; GCN-DAG: buffer_store_short [[V0]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_short [[V1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
+; GCN: s_endpgm
+define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
+ store <2 x half> %arg, <2 x half> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}load_v3f16_arg:
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN-NOT: buffer_load
+; GCN-DAG: buffer_store_dword
+; GCN-DAG: buffer_store_short
+; GCN-NOT: buffer_store
+; GCN: s_endpgm
+define void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x half> %arg) #0 {
+ store <3 x half> %arg, <3 x half> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}load_v4f16_arg:
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: s_endpgm
+define void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x half> %arg) #0 {
+ store <4 x half> %arg, <4 x half> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}load_v8f16_arg:
+define void @load_v8f16_arg(<8 x half> addrspace(1)* %out, <8 x half> %arg) #0 {
+ store <8 x half> %arg, <8 x half> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}extload_v2f16_arg:
+define void @extload_v2f16_arg(<2 x float> addrspace(1)* %out, <2 x half> %in) #0 {
+ %fpext = fpext <2 x half> %in to <2 x float>
+ store <2 x float> %fpext, <2 x float> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}extload_f16_to_f32_arg:
+define void @extload_f16_to_f32_arg(float addrspace(1)* %out, half %arg) #0 {
+ %ext = fpext half %arg to float
+ store float %ext, float addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}extload_v2f16_to_v2f32_arg:
+define void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)* %out, <2 x half> %arg) #0 {
+ %ext = fpext <2 x half> %arg to <2 x float>
+ store <2 x float> %ext, <2 x float> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}extload_v3f16_to_v3f32_arg:
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN-NOT: buffer_load
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN-NOT: v_cvt_f32_f16
+; GCN-DAG: buffer_store_dword
+; GCN-DAG: buffer_store_dwordx2
+; GCN: s_endpgm
+define void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)* %out, <3 x half> %arg) #0 {
+ %ext = fpext <3 x half> %arg to <3 x float>
+ store <3 x float> %ext, <3 x float> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}extload_v4f16_to_v4f32_arg:
+define void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x half> %arg) #0 {
+ %ext = fpext <4 x half> %arg to <4 x float>
+ store <4 x float> %ext, <4 x float> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg:
+define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 {
+ %ext = fpext <8 x half> %arg to <8 x float>
+ store <8 x float> %ext, <8 x float> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}extload_f16_to_f64_arg:
+define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 {
+ %ext = fpext half %arg to double
+ store double %ext, double addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg:
+define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 {
+ %ext = fpext <2 x half> %arg to <2 x double>
+ store <2 x double> %ext, <2 x double> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg:
+define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 {
+ %ext = fpext <3 x half> %arg to <3 x double>
+ store <3 x double> %ext, <3 x double> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg:
+define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 {
+ %ext = fpext <4 x half> %arg to <4 x double>
+ store <4 x double> %ext, <4 x double> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg:
+define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 {
+ %ext = fpext <8 x half> %arg to <8 x double>
+ store <8 x double> %ext, <8 x double> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}global_load_store_f16:
+; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
+; GCN: buffer_store_short [[TMP]]
+define void @global_load_store_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
%val = load half, half addrspace(1)* %in
- store half %val, half addrspace(1) * %out
+ store half %val, half addrspace(1)* %out
ret void
-define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) {
-; CHECK-LABEL: {{^}}test_bitcast_from_half:
-; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
-; CHECK: buffer_store_short [[TMP]]
- %val = load half, half addrspace(1) * %in
- %val_int = bitcast half %val to i16
- store i16 %val_int, i16 addrspace(1)* %out
+; GCN-LABEL: {{^}}global_load_store_v2f16:
+; GCN: buffer_load_dword [[TMP:v[0-9]+]]
+; GCN: buffer_store_dword [[TMP]]
+define void @global_load_store_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
+ %val = load <2 x half>, <2 x half> addrspace(1)* %in
+ store <2 x half> %val, <2 x half> addrspace(1)* %out
ret void
-define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) {
-; CHECK-LABEL: {{^}}test_bitcast_to_half:
-; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
-; CHECK: buffer_store_short [[TMP]]
- %val = load i16, i16 addrspace(1)* %in
- %val_fp = bitcast i16 %val to half
- store half %val_fp, half addrspace(1)* %out
+; GCN-LABEL: {{^}}global_load_store_v4f16:
+; GCN: buffer_load_dwordx2 [[TMP:v\[[0-9]+:[0-9]+\]]]
+; GCN: buffer_store_dwordx2 [[TMP]]
+define void @global_load_store_v4f16(<4 x half> addrspace(1)* %in, <4 x half> addrspace(1)* %out) #0 {
+ %val = load <4 x half>, <4 x half> addrspace(1)* %in
+ store <4 x half> %val, <4 x half> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}global_load_store_v8f16:
+; GCN: buffer_load_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
+; GCN: buffer_store_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
+; GCN: s_endpgm
+define void @global_load_store_v8f16(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
+ %val = load <8 x half>, <8 x half> addrspace(1)* %in
+ store <8 x half> %val, <8 x half> addrspace(1)* %out
ret void
-define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) {
-; CHECK-LABEL: {{^}}test_extend32:
-; CHECK: v_cvt_f32_f16_e32
+; GCN-LABEL: {{^}}global_extload_f16_to_f32:
+; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
+; GCN: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[LOAD]]
+; GCN: buffer_store_dword [[CVT]]
+define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %in) #0 {
+ %val = load half, half addrspace(1)* %in
+ %cvt = fpext half %val to float
+ store float %cvt, float addrspace(1)* %out
+ ret void
- %val16 = load half, half addrspace(1)* %in
- %val32 = fpext half %val16 to float
- store float %val32, float addrspace(1)* %out
+; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
+define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
+ %val = load <2 x half>, <2 x half> addrspace(1)* %in
+ %cvt = fpext <2 x half> %val to <2 x float>
+ store <2 x float> %cvt, <2 x float> addrspace(1)* %out
ret void
-define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) {
-; CHECK-LABEL: {{^}}test_extend64:
-; CHECK: v_cvt_f32_f16_e32
-; CHECK: v_cvt_f64_f32_e32
+; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f32:
+define void @global_extload_v3f16_to_v3f32(<3 x float> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
+ %val = load <3 x half>, <3 x half> addrspace(1)* %in
+ %cvt = fpext <3 x half> %val to <3 x float>
+ store <3 x float> %cvt, <3 x float> addrspace(1)* %out
+ ret void
- %val16 = load half, half addrspace(1)* %in
- %val64 = fpext half %val16 to double
- store double %val64, double addrspace(1)* %out
+; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f32:
+define void @global_extload_v4f16_to_v4f32(<4 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
+ %val = load <4 x half>, <4 x half> addrspace(1)* %in
+ %cvt = fpext <4 x half> %val to <4 x float>
+ store <4 x float> %cvt, <4 x float> addrspace(1)* %out
ret void
-define void @test_trunc32(float addrspace(1)* %in, half addrspace(1)* %out) {
-; CHECK-LABEL: {{^}}test_trunc32:
-; CHECK: v_cvt_f16_f32_e32
+; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f32:
+define void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
+ %val = load <8 x half>, <8 x half> addrspace(1)* %in
+ %cvt = fpext <8 x half> %val to <8 x float>
+ store <8 x float> %cvt, <8 x float> addrspace(1)* %out
+ ret void
- %val32 = load float, float addrspace(1)* %in
- %val16 = fptrunc float %val32 to half
- store half %val16, half addrspace(1)* %out
+; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32:
+define void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
+ %val = load <16 x half>, <16 x half> addrspace(1)* %in
+ %cvt = fpext <16 x half> %val to <16 x float>
+ store <16 x float> %cvt, <16 x float> addrspace(1)* %out
ret void
+; GCN-LABEL: {{^}}global_extload_f16_to_f64:
+; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
+; GCN: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[LOAD]]
+; GCN: v_cvt_f64_f32_e32 [[CVT1:v\[[0-9]+:[0-9]+\]]], [[CVT0]]
+; GCN: buffer_store_dwordx2 [[CVT1]]
+define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace(1)* %in) #0 {
+ %val = load half, half addrspace(1)* %in
+ %cvt = fpext half %val to double
+ store double %cvt, double addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
+define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
+ %val = load <2 x half>, <2 x half> addrspace(1)* %in
+ %cvt = fpext <2 x half> %val to <2 x double>
+ store <2 x double> %cvt, <2 x double> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
+define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
+ %val = load <3 x half>, <3 x half> addrspace(1)* %in
+ %cvt = fpext <3 x half> %val to <3 x double>
+ store <3 x double> %cvt, <3 x double> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f64:
+define void @global_extload_v4f16_to_v4f64(<4 x double> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
+ %val = load <4 x half>, <4 x half> addrspace(1)* %in
+ %cvt = fpext <4 x half> %val to <4 x double>
+ store <4 x double> %cvt, <4 x double> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f64:
+define void @global_extload_v8f16_to_v8f64(<8 x double> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
+ %val = load <8 x half>, <8 x half> addrspace(1)* %in
+ %cvt = fpext <8 x half> %val to <8 x double>
+ store <8 x double> %cvt, <8 x double> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f64:
+define void @global_extload_v16f16_to_v16f64(<16 x double> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
+ %val = load <16 x half>, <16 x half> addrspace(1)* %in
+ %cvt = fpext <16 x half> %val to <16 x double>
+ store <16 x double> %cvt, <16 x double> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}global_truncstore_f32_to_f16:
+; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
+; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[LOAD]]
+; GCN: buffer_store_short [[CVT]]
+define void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %val = load float, float addrspace(1)* %in
+ %cvt = fptrunc float %val to half
+ store half %cvt, half addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}global_truncstore_v2f32_to_v2f16:
+; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]]
+; GCN-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]]
+; GCN-DAG: buffer_store_short [[CVT0]]
+; GCN-DAG: buffer_store_short [[CVT1]]
+; GCN: s_endpgm
+define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
+ %val = load <2 x float>, <2 x float> addrspace(1)* %in
+ %cvt = fptrunc <2 x float> %val to <2 x half>
+ store <2 x half> %cvt, <2 x half> addrspace(1)* %out
+ ret void
+; FIXME: Shouldn't do 4th conversion
+; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16:
+; GCN: buffer_load_dwordx4
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: buffer_store_short
+; GCN: buffer_store_dword
+; GCN: s_endpgm
+define void @global_truncstore_v3f32_to_v3f16(<3 x half> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
+ %val = load <3 x float>, <3 x float> addrspace(1)* %in
+ %cvt = fptrunc <3 x float> %val to <3 x half>
+ store <3 x half> %cvt, <3 x half> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}global_truncstore_v4f32_to_v4f16:
+; GCN: buffer_load_dwordx4
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: s_endpgm
+define void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
+ %val = load <4 x float>, <4 x float> addrspace(1)* %in
+ %cvt = fptrunc <4 x float> %val to <4 x half>
+ store <4 x half> %cvt, <4 x half> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16:
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: s_endpgm
+define void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
+ %val = load <8 x float>, <8 x float> addrspace(1)* %in
+ %cvt = fptrunc <8 x float> %val to <8 x half>
+ store <8 x half> %cvt, <8 x half> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16:
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: v_cvt_f16_f32_e32
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: s_endpgm
+define void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
+ %val = load <16 x float>, <16 x float> addrspace(1)* %in
+ %cvt = fptrunc <16 x float> %val to <16 x half>
+ store <16 x half> %cvt, <16 x half> addrspace(1)* %out
+ ret void
+; FIXME: Unsafe math should fold conversions away
+; GCN-LABEL: {{^}}fadd_f16:
+; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
+; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
+; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
+; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
+; SI: v_add_f32
+; GCN: s_endpgm
+define void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #0 {
+ %add = fadd half %a, %b
+ store half %add, half addrspace(1)* %out, align 4
+ ret void
+; GCN-LABEL: {{^}}fadd_v2f16:
+; SI: v_add_f32
+; SI: v_add_f32
+; GCN: s_endpgm
+define void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %a, <2 x half> %b) #0 {
+ %add = fadd <2 x half> %a, %b
+ store <2 x half> %add, <2 x half> addrspace(1)* %out, align 8
+ ret void
+; GCN-LABEL: {{^}}fadd_v4f16:
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; GCN: s_endpgm
+define void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
+ %b_ptr = getelementptr <4 x half>, <4 x half> addrspace(1)* %in, i32 1
+ %a = load <4 x half>, <4 x half> addrspace(1)* %in, align 16
+ %b = load <4 x half>, <4 x half> addrspace(1)* %b_ptr, align 16
+ %result = fadd <4 x half> %a, %b
+ store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16
+ ret void
+; GCN-LABEL: {{^}}fadd_v8f16:
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; GCN: s_endpgm
+define void @fadd_v8f16(<8 x half> addrspace(1)* %out, <8 x half> %a, <8 x half> %b) #0 {
+ %add = fadd <8 x half> %a, %b
+ store <8 x half> %add, <8 x half> addrspace(1)* %out, align 32
+ ret void
+; GCN-LABEL: {{^}}fsub_f16:
+; GCN: v_subrev_f32_e32
+; GCN: s_endpgm
+define void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
+ %b_ptr = getelementptr half, half addrspace(1)* %in, i32 1
+ %a = load half, half addrspace(1)* %in
+ %b = load half, half addrspace(1)* %b_ptr
+ %sub = fsub half %a, %b
+ store half %sub, half addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}test_bitcast_from_half:
+; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
+; GCN: buffer_store_short [[TMP]]
+define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) #0 {
+ %val = load half, half addrspace(1)* %in
+ %val_int = bitcast half %val to i16
+ store i16 %val_int, i16 addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}test_bitcast_to_half:
+; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
+; GCN: buffer_store_short [[TMP]]
+define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+ %val = load i16, i16 addrspace(1)* %in
+ %val_fp = bitcast i16 %val to half
+ store half %val_fp, half addrspace(1)* %out
+ ret void
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/R600/imm.ll b/test/CodeGen/R600/imm.ll
index 8917cd6..12eed55 100644
--- a/test/CodeGen/R600/imm.ll
+++ b/test/CodeGen/R600/imm.ll
@@ -36,7 +36,7 @@ define void @store_imm_neg_0.0_i64(i64 addrspace(1) *%out) {
; CHECK-LABEL: {{^}}store_inline_imm_neg_0.0_i32:
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @store_inline_imm_neg_0.0_i32(i32 addrspace(1)* %out) {
store i32 -2147483648, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/loop-address.ll b/test/CodeGen/R600/loop-address.ll
index 7fadb8d..f60d574 100644
--- a/test/CodeGen/R600/loop-address.ll
+++ b/test/CodeGen/R600/loop-address.ll
@@ -1,13 +1,10 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=redwood < %s | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
-target triple = "r600--"
define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) #0 {
%cmp5 = icmp sgt i32 %iterations, 0
diff --git a/test/CodeGen/R600/loop-idiom.ll b/test/CodeGen/R600/loop-idiom.ll
index 810b34f..5fd9806 100644
--- a/test/CodeGen/R600/loop-idiom.ll
+++ b/test/CodeGen/R600/loop-idiom.ll
@@ -2,10 +2,6 @@
; RUN: opt -basicaa -loop-idiom -S < %s -march=amdgcn -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
; RUN: opt -basicaa -loop-idiom -S < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
-target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
-target triple = "r600--"
; Make sure loop-idiom doesn't create memcpy or memset. There are no library
; implementations of these for R600.
diff --git a/test/CodeGen/R600/max.ll b/test/CodeGen/R600/max.ll
index 1aa9e68..fef3e2f 100644
--- a/test/CodeGen/R600/max.ll
+++ b/test/CodeGen/R600/max.ll
@@ -115,3 +115,54 @@ define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
+; Make sure redundant and removed
+; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umax_ugt_i16:
+; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: s_max_u32 [[MIN:s[0-9]+]], [[A]], [[B]]
+; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
+; SI-NEXT: buffer_store_dword [[VMIN]]
+define void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind {
+ %a.ext = zext i16 %a to i32
+ %b.ext = zext i16 %b to i32
+ %cmp = icmp ugt i32 %a.ext, %b.ext
+ %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
+ %mask = and i32 %val, 65535
+ store i32 %mask, i32 addrspace(1)* %out
+ ret void
+; Make sure redundant sign_extend_inreg removed.
+; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16:
+; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: s_max_i32 [[MIN:s[0-9]+]], [[A]], [[B]]
+; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
+; SI-NEXT: buffer_store_dword [[VMIN]]
+define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind {
+ %a.ext = sext i16 %a to i32
+ %b.ext = sext i16 %b to i32
+ %cmp = icmp sgt i32 %a.ext, %b.ext
+ %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
+ %shl = shl i32 %val, 16
+ %sextinreg = ashr i32 %shl, 16
+ store i32 %sextinreg, i32 addrspace(1)* %out
+ ret void
+; FIXME: Should get match min/max through extends inserted by
+; legalization.
+; FUNC-LABEL: {{^}}s_test_imin_sge_i16:
+; SI: s_sext_i32_i16
+; SI: s_sext_i32_i16
+; SI: v_cmp_ge_i32_e32
+; SI: v_cndmask_b32
+define void @s_test_imin_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
+ %cmp = icmp sge i16 %a, %b
+ %val = select i1 %cmp, i16 %a, i16 %b
+ store i16 %val, i16 addrspace(1)* %out
+ ret void
diff --git a/test/CodeGen/R600/min.ll b/test/CodeGen/R600/min.ll
index 275e9a7..0332d1a 100644
--- a/test/CodeGen/R600/min.ll
+++ b/test/CodeGen/R600/min.ll
@@ -136,3 +136,54 @@ define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace
store i1 %cmp, i1 addrspace(1)* %outgep1
ret void
+; Make sure redundant and removed
+; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umin_ult_i16:
+; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: s_min_u32 [[MIN:s[0-9]+]], [[A]], [[B]]
+; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
+; SI-NEXT: buffer_store_dword [[VMIN]]
+define void @simplify_demanded_bits_test_umin_ult_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind {
+ %a.ext = zext i16 %a to i32
+ %b.ext = zext i16 %b to i32
+ %cmp = icmp ult i32 %a.ext, %b.ext
+ %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
+ %mask = and i32 %val, 65535
+ store i32 %mask, i32 addrspace(1)* %out
+ ret void
+; Make sure redundant sign_extend_inreg removed.
+; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16:
+; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: s_min_i32 [[MIN:s[0-9]+]], [[A]], [[B]]
+; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
+; SI-NEXT: buffer_store_dword [[VMIN]]
+define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind {
+ %a.ext = sext i16 %a to i32
+ %b.ext = sext i16 %b to i32
+ %cmp = icmp slt i32 %a.ext, %b.ext
+ %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
+ %shl = shl i32 %val, 16
+ %sextinreg = ashr i32 %shl, 16
+ store i32 %sextinreg, i32 addrspace(1)* %out
+ ret void
+; FIXME: Should get match min/max through extends inserted by
+; legalization.
+; FUNC-LABEL: {{^}}s_test_imin_sle_i16:
+; SI: s_sext_i32_i16
+; SI: s_sext_i32_i16
+; SI: v_cmp_le_i32_e32
+; SI: v_cndmask_b32
+define void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
+ %cmp = icmp sle i16 %a, %b
+ %val = select i1 %cmp, i16 %a, i16 %b
+ store i16 %val, i16 addrspace(1)* %out
+ ret void
diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll
index d9ad493..5aedda2 100644
--- a/test/CodeGen/R600/sext-in-reg.ll
+++ b/test/CodeGen/R600/sext-in-reg.ll
@@ -450,13 +450,10 @@ define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x
ret void
-; FIXME: The BFE should really be eliminated. I think it should happen
-; when computeKnownBitsForTargetNode is implemented for imax.
; FUNC-LABEL: {{^}}sext_in_reg_to_illegal_type:
; SI: buffer_load_sbyte
; SI: v_max_i32
-; SI: v_bfe_i32
+; SI-NOT: bfe
; SI: buffer_store_short
define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
%tmp5 = load i8, i8 addrspace(1)* %src, align 1
diff --git a/test/CodeGen/R600/si-vector-hang.ll b/test/CodeGen/R600/si-vector-hang.ll
index 94c47fe..bd427dd 100644
--- a/test/CodeGen/R600/si-vector-hang.ll
+++ b/test/CodeGen/R600/si-vector-hang.ll
@@ -11,10 +11,7 @@
; CHECK: buffer_store_byte
; CHECK: buffer_store_byte
; ModuleID = 'radeon'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
-target triple = "r600--"
-; Function Attrs: nounwind
define void @test_8_min_char(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture readonly %in0, i8 addrspace(1)* nocapture readonly %in1) #0 {
%0 = load i8, i8 addrspace(1)* %in0, align 1
diff --git a/test/CodeGen/R600/subreg-eliminate-dead.ll b/test/CodeGen/R600/subreg-eliminate-dead.ll
new file mode 100644
index 0000000..8bd995a
--- /dev/null
+++ b/test/CodeGen/R600/subreg-eliminate-dead.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs -o - %s | FileCheck %s
+; LiveRangeEdit::eliminateDeadDef did not update LiveInterval sub ranges
+; properly.
+; Just make sure this test doesn't crash.
+; CHECK-LABEL: foobar:
+; CHECK: s_endpgm
+define void @foobar() {
+ %v0 = icmp eq <4 x i32> undef, <i32 0, i32 1, i32 2, i32 3>
+ %v3 = sext <4 x i1> %v0 to <4 x i32>
+ %v4 = extractelement <4 x i32> %v3, i32 1
+ %v5 = icmp ne i32 %v4, 0
+ %v6 = select i1 %v5, i32 undef, i32 0
+ %v15 = insertelement <2 x i32> undef, i32 %v6, i32 1
+ store <2 x i32> %v15, <2 x i32> addrspace(1)* undef, align 8
+ ret void
+declare double @llvm.fma.f64(double, double, double)
diff --git a/test/CodeGen/R600/trunc-store-f64-to-f16.ll b/test/CodeGen/R600/trunc-store-f64-to-f16.ll
new file mode 100644
index 0000000..c29872b
--- /dev/null
+++ b/test/CodeGen/R600/trunc-store-f64-to-f16.ll
@@ -0,0 +1,56 @@
+; XFAIL: *
+; RUN: llc -march=amdgcn -mcpu=SI < %s
+; GCN-LABEL: {{^}}global_truncstore_f64_to_f16:
+; GCN: s_endpgm
+define void @global_truncstore_f64_to_f16(half addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %val = load double, double addrspace(1)* %in
+ %cvt = fptrunc double %val to half
+ store half %cvt, half addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}global_truncstore_v2f64_to_v2f16:
+; GCN: s_endpgm
+define void @global_truncstore_v2f64_to_v2f16(<2 x half> addrspace(1)* %out, <2 x double> addrspace(1)* %in) #0 {
+ %val = load <2 x double>, <2 x double> addrspace(1)* %in
+ %cvt = fptrunc <2 x double> %val to <2 x half>
+ store <2 x half> %cvt, <2 x half> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}global_truncstore_v3f64_to_v3f16:
+; GCN: s_endpgm
+define void @global_truncstore_v3f64_to_v3f16(<3 x half> addrspace(1)* %out, <3 x double> addrspace(1)* %in) #0 {
+ %val = load <3 x double>, <3 x double> addrspace(1)* %in
+ %cvt = fptrunc <3 x double> %val to <3 x half>
+ store <3 x half> %cvt, <3 x half> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}global_truncstore_v4f64_to_v4f16:
+; GCN: s_endpgm
+define void @global_truncstore_v4f64_to_v4f16(<4 x half> addrspace(1)* %out, <4 x double> addrspace(1)* %in) #0 {
+ %val = load <4 x double>, <4 x double> addrspace(1)* %in
+ %cvt = fptrunc <4 x double> %val to <4 x half>
+ store <4 x half> %cvt, <4 x half> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}global_truncstore_v8f64_to_v8f16:
+; GCN: s_endpgm
+define void @global_truncstore_v8f64_to_v8f16(<8 x half> addrspace(1)* %out, <8 x double> addrspace(1)* %in) #0 {
+ %val = load <8 x double>, <8 x double> addrspace(1)* %in
+ %cvt = fptrunc <8 x double> %val to <8 x half>
+ store <8 x half> %cvt, <8 x half> addrspace(1)* %out
+ ret void
+; GCN-LABEL: {{^}}global_truncstore_v16f64_to_v16f16:
+; GCN: s_endpgm
+define void @global_truncstore_v16f64_to_v16f16(<16 x half> addrspace(1)* %out, <16 x double> addrspace(1)* %in) #0 {
+ %val = load <16 x double>, <16 x double> addrspace(1)* %in
+ %cvt = fptrunc <16 x double> %val to <16 x half>
+ store <16 x half> %cvt, <16 x half> addrspace(1)* %out
+ ret void
diff --git a/test/CodeGen/R600/unroll.ll b/test/CodeGen/R600/unroll.ll
index ca8d822..411a15a 100644
--- a/test/CodeGen/R600/unroll.ll
+++ b/test/CodeGen/R600/unroll.ll
@@ -1,7 +1,6 @@
-; RUN: opt -loop-unroll -simplifycfg -sroa %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=amdgcn-- -loop-unroll -simplifycfg -sroa %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=r600-- -loop-unroll -simplifycfg -sroa %s -S -o - | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
-target triple = "r600--"
; This test contains a simple loop that initializes an array declared in
; private memory. We want to make sure these kinds of loops are always
diff --git a/test/CodeGen/R600/wrong-transalu-pos-fix.ll b/test/CodeGen/R600/wrong-transalu-pos-fix.ll
index 5ab4653..8b383e4 100644
--- a/test/CodeGen/R600/wrong-transalu-pos-fix.ll
+++ b/test/CodeGen/R600/wrong-transalu-pos-fix.ll
@@ -1,14 +1,9 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc -march=r600 -mcpu=redwood -mtriple=r600-- < %s | FileCheck %s
; We want all MULLO_INT inst to be last in their instruction group
;CHECK: {{^}}fill3d:
-; ModuleID = 'radeon'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
-target triple = "r600--"
-; Function Attrs: nounwind
define void @fill3d(i32 addrspace(1)* nocapture %out) #0 {
%x.i = tail call i32 #1
diff --git a/test/CodeGen/Thumb2/constant-islands-jump-table.ll b/test/CodeGen/Thumb2/constant-islands-jump-table.ll
index 0dd7092..5ffe1f9 100644
--- a/test/CodeGen/Thumb2/constant-islands-jump-table.ll
+++ b/test/CodeGen/Thumb2/constant-islands-jump-table.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=thumbv7-linux-gnueabihf -O1 %s -o - | FileCheck %s
; CHECK-LABEL: test_jump_table:
-; CHECK: b .LBB
+; CHECK: b{{.*}} .LBB
; CHECK-NOT: tbh
define i32 @test_jump_table(i32 %x, float %in) {
diff --git a/test/CodeGen/Thumb2/float-ops.ll b/test/CodeGen/Thumb2/float-ops.ll
index 7ec08f8..4c42908 100644
--- a/test/CodeGen/Thumb2/float-ops.ll
+++ b/test/CodeGen/Thumb2/float-ops.ll
@@ -109,7 +109,7 @@ entry:
define double @load_d(double* %a) {
; CHECK-LABEL: load_d:
-; NONE: ldm r0, {r0, r1}
+; NONE: ldrd r0, r1, [r0]
; HARD: vldr d0, [r0]
%0 = load double, double* %a, align 8
ret double %0
@@ -127,9 +127,7 @@ entry:
define void @store_d(double* %a, double %b) {
; CHECK-LABEL: store_d:
-; NONE: mov r1, r3
-; NONE: str r2, [r0]
-; NONE: str r1, [r0, #4]
+; NONE: strd r2, r3, [r0]
; HARD: vstr d0, [r0]
store double %b, double* %a, align 8
ret void
diff --git a/test/CodeGen/Thumb2/thumb2-tbh.ll b/test/CodeGen/Thumb2/thumb2-tbh.ll
index a5a5ed0..0761ed5 100644
--- a/test/CodeGen/Thumb2/thumb2-tbh.ll
+++ b/test/CodeGen/Thumb2/thumb2-tbh.ll
@@ -14,9 +14,19 @@ declare void @Z_fatal(i8*) noreturn nounwind
declare noalias i8* @calloc(i32, i32) nounwind
+; Jump tables are not anchored next to the TBB/TBH any more. Make sure the
+; correct address is still calculated (i.e. via a PC-relative symbol *at* the
+; TBB/TBH).
define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
; CHECK-LABEL: main:
-; CHECK: tbb
+; CHECK-NOT: adr {{r[0-9]+}}, LJTI
+; CHECK: [[PCREL_ANCHOR:LCPI[0-9]+_[0-9]+]]:
+; CHECK-NEXT: tbb [pc, {{r[0-9]+}}]
+; CHECK: LJTI0_0:
+; CHECK-NEXT: .data_region jt8
+; CHECK-NEXT: .byte (LBB{{[0-9]+_[0-9]+}}-([[PCREL_ANCHOR]]+4))/2
br label %bb42.i
diff --git a/test/CodeGen/X86/asm-reject-reg-type-mismatch.ll b/test/CodeGen/X86/asm-reject-reg-type-mismatch.ll
new file mode 100644
index 0000000..016e2d2
--- /dev/null
+++ b/test/CodeGen/X86/asm-reject-reg-type-mismatch.ll
@@ -0,0 +1,10 @@
+; RUN: not llc -no-integrated-as %s -o - 2> %t1
+; RUN: FileCheck %s < %t1
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64--"
+; CHECK: error: couldn't allocate output register for constraint '{ax}'
+define i128 @blup() {
+ %v = tail call i128 asm "", "={ax},0,~{dirflag},~{fpsr},~{flags}"(i128 0)
+ ret i128 %v
diff --git a/test/CodeGen/X86/avx-vperm2x128.ll b/test/CodeGen/X86/avx-vperm2x128.ll
index 10ed079..74d20f3 100644
--- a/test/CodeGen/X86/avx-vperm2x128.ll
+++ b/test/CodeGen/X86/avx-vperm2x128.ll
@@ -147,8 +147,8 @@ define <16 x i16> @E5i(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone
; AVX1-LABEL: E5i:
; AVX1: ## BB#0: ## %entry
; AVX1-NEXT: vmovdqa (%rdi), %ymm0
-; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vmovaps (%rsi), %ymm1
+; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
diff --git a/test/CodeGen/X86/avx2-vector-shifts.ll b/test/CodeGen/X86/avx2-vector-shifts.ll
index 8aae90c..5d99269 100644
--- a/test/CodeGen/X86/avx2-vector-shifts.ll
+++ b/test/CodeGen/X86/avx2-vector-shifts.ll
@@ -300,6 +300,56 @@ define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
ret <16 x i16> %shl
+define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
+; CHECK-LABEL: shl_32i8
+; CHECK: vextracti128 $1, %ymm0, %xmm3
+; CHECK-NEXT: vpsllw $4, %xmm3, %xmm2
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm8 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; CHECK-NEXT: vpand %xmm8, %xmm2, %xmm5
+; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2
+; CHECK-NEXT: vpsllw $5, %xmm2, %xmm2
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm9 = [224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224]
+; CHECK-NEXT: vpand %xmm9, %xmm2, %xmm7
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; CHECK-NEXT: vpand %xmm7, %xmm2, %xmm4
+; CHECK-NEXT: vpcmpeqb %xmm2, %xmm4, %xmm4
+; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm3, %xmm3
+; CHECK-NEXT: vpsllw $2, %xmm3, %xmm4
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; CHECK-NEXT: vpand %xmm5, %xmm4, %xmm4
+; CHECK-NEXT: vpaddb %xmm7, %xmm7, %xmm7
+; CHECK-NEXT: vpand %xmm7, %xmm2, %xmm6
+; CHECK-NEXT: vpcmpeqb %xmm2, %xmm6, %xmm6
+; CHECK-NEXT: vpblendvb %xmm6, %xmm4, %xmm3, %xmm3
+; CHECK-NEXT: vpaddb %xmm3, %xmm3, %xmm4
+; CHECK-NEXT: vpaddb %xmm7, %xmm7, %xmm6
+; CHECK-NEXT: vpand %xmm6, %xmm2, %xmm6
+; CHECK-NEXT: vpcmpeqb %xmm2, %xmm6, %xmm6
+; CHECK-NEXT: vpblendvb %xmm6, %xmm4, %xmm3, %xmm3
+; CHECK-NEXT: vpsllw $4, %xmm0, %xmm4
+; CHECK-NEXT: vpand %xmm8, %xmm4, %xmm4
+; CHECK-NEXT: vpsllw $5, %xmm1, %xmm1
+; CHECK-NEXT: vpand %xmm9, %xmm1, %xmm1
+; CHECK-NEXT: vpand %xmm1, %xmm2, %xmm6
+; CHECK-NEXT: vpcmpeqb %xmm2, %xmm6, %xmm6
+; CHECK-NEXT: vpblendvb %xmm6, %xmm4, %xmm0, %xmm0
+; CHECK-NEXT: vpsllw $2, %xmm0, %xmm4
+; CHECK-NEXT: vpand %xmm5, %xmm4, %xmm4
+; CHECK-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpand %xmm1, %xmm2, %xmm5
+; CHECK-NEXT: vpcmpeqb %xmm2, %xmm5, %xmm5
+; CHECK-NEXT: vpblendvb %xmm5, %xmm4, %xmm0, %xmm0
+; CHECK-NEXT: vpaddb %xmm0, %xmm0, %xmm4
+; CHECK-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpand %xmm1, %xmm2, %xmm1
+; CHECK-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
+; CHECK-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %shl = shl <32 x i8> %r, %a
+ ret <32 x i8> %shl
define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; CHECK-LABEL: ashr_8i16
; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
@@ -329,6 +379,176 @@ define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
ret <16 x i16> %ashr
+define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
+; CHECK-LABEL: ashr_32i8
+; CHECK: vextracti128 $1, %ymm1, %xmm2
+; CHECK-NEXT: vpextrb $1, %xmm2, %ecx
+; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm3
+; CHECK-NEXT: vpextrb $1, %xmm3, %eax
+; CHECK-NEXT: sarb %cl, %al
+; CHECK-NEXT: vpextrb $0, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $0, %xmm3, %edx
+; CHECK-NEXT: sarb %cl, %dl
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: movzbl %dl, %edx
+; CHECK-NEXT: vpextrb $2, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $2, %xmm3, %esi
+; CHECK-NEXT: sarb %cl, %sil
+; CHECK-NEXT: vmovd %edx, %xmm4
+; CHECK-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: vpextrb $3, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $3, %xmm3, %edx
+; CHECK-NEXT: sarb %cl, %dl
+; CHECK-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
+; CHECK-NEXT: vpextrb $4, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $4, %xmm3, %eax
+; CHECK-NEXT: sarb %cl, %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
+; CHECK-NEXT: vpextrb $5, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $5, %xmm3, %eax
+; CHECK-NEXT: sarb %cl, %al
+; CHECK-NEXT: vpextrb $6, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $6, %xmm3, %edx
+; CHECK-NEXT: sarb %cl, %dl
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpextrb $7, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $7, %xmm3, %edx
+; CHECK-NEXT: sarb %cl, %dl
+; CHECK-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
+; CHECK-NEXT: vpextrb $8, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $8, %xmm3, %eax
+; CHECK-NEXT: sarb %cl, %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
+; CHECK-NEXT: vpextrb $9, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $9, %xmm3, %eax
+; CHECK-NEXT: sarb %cl, %al
+; CHECK-NEXT: vpextrb $10, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $10, %xmm3, %edx
+; CHECK-NEXT: sarb %cl, %dl
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpextrb $11, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $11, %xmm3, %edx
+; CHECK-NEXT: sarb %cl, %dl
+; CHECK-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
+; CHECK-NEXT: vpextrb $12, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $12, %xmm3, %eax
+; CHECK-NEXT: sarb %cl, %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
+; CHECK-NEXT: vpextrb $13, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $13, %xmm3, %eax
+; CHECK-NEXT: sarb %cl, %al
+; CHECK-NEXT: vpextrb $14, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $14, %xmm3, %edx
+; CHECK-NEXT: sarb %cl, %dl
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
+; CHECK-NEXT: vpextrb $15, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $15, %xmm3, %eax
+; CHECK-NEXT: sarb %cl, %al
+; CHECK-NEXT: vpextrb $1, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $1, %xmm0, %esi
+; CHECK-NEXT: sarb %cl, %sil
+; CHECK-NEXT: movzbl %dl, %ecx
+; CHECK-NEXT: vpinsrb $14, %ecx, %xmm4, %xmm2
+; CHECK-NEXT: vpextrb $0, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $0, %xmm0, %edx
+; CHECK-NEXT: sarb %cl, %dl
+; CHECK-NEXT: vpextrb $2, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $2, %xmm0, %edi
+; CHECK-NEXT: sarb %cl, %dil
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: movzbl %dl, %ecx
+; CHECK-NEXT: vmovd %ecx, %xmm3
+; CHECK-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: vpextrb $3, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $3, %xmm0, %edx
+; CHECK-NEXT: sarb %cl, %dl
+; CHECK-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; CHECK-NEXT: vpextrb $4, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $4, %xmm0, %eax
+; CHECK-NEXT: sarb %cl, %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; CHECK-NEXT: vpextrb $5, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $5, %xmm0, %eax
+; CHECK-NEXT: sarb %cl, %al
+; CHECK-NEXT: vpextrb $6, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $6, %xmm0, %edx
+; CHECK-NEXT: sarb %cl, %dl
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpextrb $7, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $7, %xmm0, %edx
+; CHECK-NEXT: sarb %cl, %dl
+; CHECK-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; CHECK-NEXT: vpextrb $8, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $8, %xmm0, %eax
+; CHECK-NEXT: sarb %cl, %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; CHECK-NEXT: vpextrb $9, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $9, %xmm0, %eax
+; CHECK-NEXT: sarb %cl, %al
+; CHECK-NEXT: vpextrb $10, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $10, %xmm0, %edx
+; CHECK-NEXT: sarb %cl, %dl
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpextrb $11, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $11, %xmm0, %edx
+; CHECK-NEXT: sarb %cl, %dl
+; CHECK-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; CHECK-NEXT: vpextrb $12, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $12, %xmm0, %eax
+; CHECK-NEXT: sarb %cl, %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; CHECK-NEXT: vpextrb $13, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $13, %xmm0, %eax
+; CHECK-NEXT: sarb %cl, %al
+; CHECK-NEXT: vpextrb $14, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $14, %xmm0, %edx
+; CHECK-NEXT: sarb %cl, %dl
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpextrb $15, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $15, %xmm0, %edx
+; CHECK-NEXT: sarb %cl, %dl
+; CHECK-NEXT: vpinsrb $14, %eax, %xmm3, %xmm0
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %ashr = ashr <32 x i8> %r, %a
+ ret <32 x i8> %ashr
define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; CHECK-LABEL: lshr_8i16
; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
@@ -357,3 +577,173 @@ define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
%lshr = lshr <16 x i16> %r, %a
ret <16 x i16> %lshr
+define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
+; CHECK-LABEL: lshr_32i8
+; CHECK: vextracti128 $1, %ymm1, %xmm2
+; CHECK-NEXT: vpextrb $1, %xmm2, %ecx
+; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm3
+; CHECK-NEXT: vpextrb $1, %xmm3, %eax
+; CHECK-NEXT: shrb %cl, %al
+; CHECK-NEXT: vpextrb $0, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $0, %xmm3, %edx
+; CHECK-NEXT: shrb %cl, %dl
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: movzbl %dl, %edx
+; CHECK-NEXT: vpextrb $2, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $2, %xmm3, %esi
+; CHECK-NEXT: shrb %cl, %sil
+; CHECK-NEXT: vmovd %edx, %xmm4
+; CHECK-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: vpextrb $3, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $3, %xmm3, %edx
+; CHECK-NEXT: shrb %cl, %dl
+; CHECK-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
+; CHECK-NEXT: vpextrb $4, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $4, %xmm3, %eax
+; CHECK-NEXT: shrb %cl, %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
+; CHECK-NEXT: vpextrb $5, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $5, %xmm3, %eax
+; CHECK-NEXT: shrb %cl, %al
+; CHECK-NEXT: vpextrb $6, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $6, %xmm3, %edx
+; CHECK-NEXT: shrb %cl, %dl
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpextrb $7, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $7, %xmm3, %edx
+; CHECK-NEXT: shrb %cl, %dl
+; CHECK-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
+; CHECK-NEXT: vpextrb $8, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $8, %xmm3, %eax
+; CHECK-NEXT: shrb %cl, %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
+; CHECK-NEXT: vpextrb $9, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $9, %xmm3, %eax
+; CHECK-NEXT: shrb %cl, %al
+; CHECK-NEXT: vpextrb $10, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $10, %xmm3, %edx
+; CHECK-NEXT: shrb %cl, %dl
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpextrb $11, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $11, %xmm3, %edx
+; CHECK-NEXT: shrb %cl, %dl
+; CHECK-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
+; CHECK-NEXT: vpextrb $12, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $12, %xmm3, %eax
+; CHECK-NEXT: shrb %cl, %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
+; CHECK-NEXT: vpextrb $13, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $13, %xmm3, %eax
+; CHECK-NEXT: shrb %cl, %al
+; CHECK-NEXT: vpextrb $14, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $14, %xmm3, %edx
+; CHECK-NEXT: shrb %cl, %dl
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
+; CHECK-NEXT: vpextrb $15, %xmm2, %ecx
+; CHECK-NEXT: vpextrb $15, %xmm3, %eax
+; CHECK-NEXT: shrb %cl, %al
+; CHECK-NEXT: vpextrb $1, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $1, %xmm0, %esi
+; CHECK-NEXT: shrb %cl, %sil
+; CHECK-NEXT: movzbl %dl, %ecx
+; CHECK-NEXT: vpinsrb $14, %ecx, %xmm4, %xmm2
+; CHECK-NEXT: vpextrb $0, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $0, %xmm0, %edx
+; CHECK-NEXT: shrb %cl, %dl
+; CHECK-NEXT: vpextrb $2, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $2, %xmm0, %edi
+; CHECK-NEXT: shrb %cl, %dil
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: movzbl %dl, %ecx
+; CHECK-NEXT: vmovd %ecx, %xmm3
+; CHECK-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: vpextrb $3, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $3, %xmm0, %edx
+; CHECK-NEXT: shrb %cl, %dl
+; CHECK-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; CHECK-NEXT: vpextrb $4, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $4, %xmm0, %eax
+; CHECK-NEXT: shrb %cl, %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; CHECK-NEXT: vpextrb $5, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $5, %xmm0, %eax
+; CHECK-NEXT: shrb %cl, %al
+; CHECK-NEXT: vpextrb $6, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $6, %xmm0, %edx
+; CHECK-NEXT: shrb %cl, %dl
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpextrb $7, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $7, %xmm0, %edx
+; CHECK-NEXT: shrb %cl, %dl
+; CHECK-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; CHECK-NEXT: vpextrb $8, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $8, %xmm0, %eax
+; CHECK-NEXT: shrb %cl, %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; CHECK-NEXT: vpextrb $9, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $9, %xmm0, %eax
+; CHECK-NEXT: shrb %cl, %al
+; CHECK-NEXT: vpextrb $10, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $10, %xmm0, %edx
+; CHECK-NEXT: shrb %cl, %dl
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpextrb $11, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $11, %xmm0, %edx
+; CHECK-NEXT: shrb %cl, %dl
+; CHECK-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; CHECK-NEXT: vpextrb $12, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $12, %xmm0, %eax
+; CHECK-NEXT: shrb %cl, %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; CHECK-NEXT: vpextrb $13, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $13, %xmm0, %eax
+; CHECK-NEXT: shrb %cl, %al
+; CHECK-NEXT: vpextrb $14, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $14, %xmm0, %edx
+; CHECK-NEXT: shrb %cl, %dl
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpextrb $15, %xmm1, %ecx
+; CHECK-NEXT: vpextrb $15, %xmm0, %edx
+; CHECK-NEXT: shrb %cl, %dl
+; CHECK-NEXT: vpinsrb $14, %eax, %xmm3, %xmm0
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %lshr = lshr <32 x i8> %r, %a
+ ret <32 x i8> %lshr
diff --git a/test/CodeGen/X86/avx512-build-vector.ll b/test/CodeGen/X86/avx512-build-vector.ll
index 8373c6d..e70d9f3 100644
--- a/test/CodeGen/X86/avx512-build-vector.ll
+++ b/test/CodeGen/X86/avx512-build-vector.ll
@@ -2,13 +2,9 @@
define <16 x i32> @test1(i32* %x) {
; CHECK-LABEL: test1:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vmovd (%rdi), %xmm0
-; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4],ymm1[5,6,7]
-; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; CHECK-NEXT: retq
+; CHECK: vmovd (%rdi), %xmm
+; CHECK: vmovdqa32
+; CHECK: vpermt2d %zmm
%y = load i32, i32* %x, align 4
%res = insertelement <16 x i32>zeroinitializer, i32 %y, i32 4
ret <16 x i32>%res
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll
index 471e34c..9387192 100644
--- a/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-intrinsics.ll
@@ -98,18 +98,55 @@ define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
declare <4 x float><4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
+ ; CHECK-LABEL: test_sqrt_pd_512
; CHECK: vsqrtpd
- %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) ; <<8 x double>> [#uses=1]
+ %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
ret <8 x double> %res
-declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
+declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
+ ; CHECK-LABEL: test_sqrt_ps_512
; CHECK: vsqrtps
- %res = call <16 x float><16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ; <<16 x float>> [#uses=1]
+ %res = call <16 x float><16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
ret <16 x float> %res
-declare <16 x float><16 x float>, <16 x float>, i16, i32) nounwind readnone
+define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
+ ; CHECK-LABEL: test_sqrt_round_ps_512
+ ; CHECK: vsqrtps {rz-sae}
+ %res = call <16 x float><16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)
+ ret <16 x float> %res
+declare <16 x float><16 x float>, <16 x float>, i16, i32) nounwind readnone
+define <8 x double> @test_getexp_pd_512(<8 x double> %a0) {
+ ; CHECK-LABEL: test_getexp_pd_512
+ ; CHECK: vgetexppd
+ %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
+ ret <8 x double> %res
+define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) {
+ ; CHECK-LABEL: test_getexp_round_pd_512
+ ; CHECK: vgetexppd {sae}
+ %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
+ ret <8 x double> %res
+declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
+define <16 x float> @test_getexp_ps_512(<16 x float> %a0) {
+ ; CHECK-LABEL: test_getexp_ps_512
+ ; CHECK: vgetexpps
+ %res = call <16 x float><16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
+ ret <16 x float> %res
+define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) {
+ ; CHECK-LABEL: test_getexp_round_ps_512
+ ; CHECK: vgetexpps {sae}
+ %res = call <16 x float><16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
+ ret <16 x float> %res
+declare <16 x float><16 x float>, <16 x float>, i16, i32) nounwind readnone
define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK: vsqrtss {{.*}}encoding: [0x62
diff --git a/test/CodeGen/X86/avx512-shuffle.ll b/test/CodeGen/X86/avx512-shuffle.ll
new file mode 100644
index 0000000..2683d6f
--- /dev/null
+++ b/test/CodeGen/X86/avx512-shuffle.ll
@@ -0,0 +1,336 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK-SKX
+; CHECK-LABEL: test1:
+; CHECK: vpermps
+; CHECK: ret
+define <16 x float> @test1(<16 x float> %a) nounwind {
+ %c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
+ ret <16 x float> %c
+; CHECK-LABEL: test2:
+; CHECK: vpermd
+; CHECK: ret
+define <16 x i32> @test2(<16 x i32> %a) nounwind {
+ %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
+ ret <16 x i32> %c
+; CHECK-LABEL: test3:
+; CHECK: vpermq
+; CHECK: ret
+define <8 x i64> @test3(<8 x i64> %a) nounwind {
+ %c = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 2, i32 5, i32 1, i32 undef, i32 7, i32 undef, i32 3, i32 1>
+ ret <8 x i64> %c
+; CHECK-LABEL: test4:
+; CHECK: vpermpd
+; CHECK: ret
+define <8 x double> @test4(<8 x double> %a) nounwind {
+ %c = shufflevector <8 x double> %a, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <8 x double> %c
+; CHECK-LABEL: test5:
+; CHECK: vpermt2pd
+; CHECK: ret
+define <8 x double> @test5(<8 x double> %a, <8 x double> %b) nounwind {
+ %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
+ ret <8 x double> %c
+; CHECK-LABEL: test6:
+; CHECK: vpermq $30
+; CHECK: ret
+define <8 x i64> @test6(<8 x i64> %a) nounwind {
+ %c = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4>
+ ret <8 x i64> %c
+; CHECK-LABEL: test7:
+; CHECK: vpermt2q
+; CHECK: ret
+define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind {
+ %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
+ ret <8 x i64> %c
+; CHECK-LABEL: test8:
+; CHECK: vpermt2d
+; CHECK: ret
+define <16 x i32> @test8(<16 x i32> %a, <16 x i32> %b) nounwind {
+ %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+ ret <16 x i32> %c
+; CHECK-LABEL: test9:
+; CHECK: vpermt2ps
+; CHECK: ret
+define <16 x float> @test9(<16 x float> %a, <16 x float> %b) nounwind {
+ %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+ ret <16 x float> %c
+; CHECK-LABEL: test10:
+; CHECK: vpermt2ps (
+; CHECK: ret
+define <16 x float> @test10(<16 x float> %a, <16 x float>* %b) nounwind {
+ %c = load <16 x float>, <16 x float>* %b
+ %d = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+ ret <16 x float> %d
+; CHECK-LABEL: test11:
+; CHECK: vpermt2d
+; CHECK: ret
+define <16 x i32> @test11(<16 x i32> %a, <16 x i32>* %b) nounwind {
+ %c = load <16 x i32>, <16 x i32>* %b
+ %d = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+ ret <16 x i32> %d
+; CHECK-LABEL: test13
+; CHECK: vpermilps $177, %zmm
+; CHECK: ret
+define <16 x float> @test13(<16 x float> %a) {
+ %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32><i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+ ret <16 x float> %b
+; CHECK-LABEL: test14
+; CHECK: vpermilpd $203, %zmm
+; CHECK: ret
+define <8 x double> @test14(<8 x double> %a) {
+ %b = shufflevector <8 x double> %a, <8 x double> undef, <8 x i32><i32 1, i32 1, i32 2, i32 3, i32 4, i32 4, i32 7, i32 7>
+ ret <8 x double> %b
+; CHECK-LABEL: test15
+; CHECK: vpshufd $177, %zmm
+; CHECK: ret
+define <16 x i32> @test15(<16 x i32> %a) {
+; mask 1-0-3-2 = 10110001 = 0xb1 = 177
+ %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+ ret <16 x i32> %b
+; CHECK-LABEL: test16
+; CHECK: valignq $2, %zmm0, %zmm1
+; CHECK: ret
+define <8 x double> @test16(<8 x double> %a, <8 x double> %b) nounwind {
+ %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+ ret <8 x double> %c
+; CHECK-LABEL: test17
+; CHECK: vshufpd $19, %zmm1, %zmm0
+; CHECK: ret
+define <8 x double> @test17(<8 x double> %a, <8 x double> %b) nounwind {
+ %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 9, i32 2, i32 10, i32 5, i32 undef, i32 undef, i32 undef>
+ ret <8 x double> %c
+; CHECK-LABEL: test18
+; CHECK: vpunpckhdq %zmm
+; CHECK: ret
+define <16 x i32> @test18(<16 x i32> %a, <16 x i32> %c) {
+ %b = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32><i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
+ ret <16 x i32> %b
+; CHECK-LABEL: test19
+; CHECK: vpunpckldq %zmm
+; CHECK: ret
+define <16 x i32> @test19(<16 x i32> %a, <16 x i32> %c) {
+ %b = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32><i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
+ ret <16 x i32> %b
+; CHECK-LABEL: test20
+; CHECK: vpunpckhqdq %zmm
+; CHECK: ret
+define <8 x i64> @test20(<8 x i64> %a, <8 x i64> %c) {
+ %b = shufflevector <8 x i64> %a, <8 x i64> %c, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+ ret <8 x i64> %b
+; CHECK-LABEL: test21
+; CHECK: vbroadcastsd %xmm0, %zmm
+; CHECK: ret
+define <8 x double> @test21(<8 x double> %a, <8 x double> %b) {
+ %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <8 x double> %shuffle
+; CHECK-LABEL: test22
+; CHECK: vpbroadcastq %xmm0, %zmm
+; CHECK: ret
+define <8 x i64> @test22(<8 x i64> %a, <8 x i64> %b) {
+ %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <8 x i64> %shuffle
+; CHECK-LABEL: @test23
+; CHECK: vshufps
+; CHECK: vshufps
+; CHECK: ret
+define <16 x i32> @test23(<16 x i32> %a, <16 x i32> %b) nounwind {
+ %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i32> %c
+; CHECK-LABEL: @test24
+; CHECK: vpermt2d
+; CHECK: ret
+define <16 x i32> @test24(<16 x i32> %a, <16 x i32> %b) nounwind {
+ %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 25, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i32> %c
+; CHECK-LABEL: @test25
+; CHECK: vshufps $52
+; CHECK: ret
+define <16 x i32> @test25(<16 x i32> %a, <16 x i32> %b) nounwind {
+; mask - 0-1-3-0 00110100 = 0x34 = 52
+ %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 19, i32 16, i32 4, i32 5, i32 23, i32 undef, i32 8, i32 9, i32 27, i32 undef, i32 12, i32 13, i32 undef, i32 undef>
+ ret <16 x i32> %c
+; CHECK-LABEL: @test26
+; CHECK: vmovshdup
+; CHECK: ret
+define <16 x i32> @test26(<16 x i32> %a) nounwind {
+ %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 undef, i32 9, i32 9, i32 undef, i32 11, i32 13, i32 undef, i32 undef, i32 undef>
+ ret <16 x i32> %c
+; CHECK-LABEL: @test27
+; CHECK: ret
+define <16 x i32> @test27(<4 x i32>%a) {
+ %res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i32> %res
+; CHECK-LABEL: test28
+; CHECK: vpshufhw $177, %ymm
+; CHECK: ret
+define <16 x i16> @test28(<16 x i16> %a) {
+ %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32><i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 12, i32 15, i32 14>
+ ret <16 x i16> %b
+; CHECK-LABEL: test29
+; CHECK: vunpcklps %zmm
+; CHECK: ret
+define <16 x float> @test29(<16 x float> %a, <16 x float> %c) {
+ %b = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32><i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
+ ret <16 x float> %b
+; CHECK-LABEL: @test30
+; CHECK: vshufps $144, %zmm
+; CHECK: ret
+define <16 x float> @test30(<16 x float> %a, <16 x float> %c) {
+ %b = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32><i32 0, i32 0, i32 17, i32 18, i32 4, i32 4, i32 21, i32 22, i32 8, i32 8, i32 25, i32 26, i32 12, i32 12, i32 29, i32 30>
+ ret <16 x float> %b
+; CHECK-LABEL: test31
+; CHECK: valignd $3, %zmm0, %zmm1
+; CHECK: ret
+define <16 x i32> @test31(<16 x i32> %a, <16 x i32> %b) nounwind {
+ %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+ ret <16 x i32> %c
+; CHECK-LABEL: test32
+; CHECK: vshufpd $99, %zmm0, %zmm1
+; CHECK: ret
+define <8 x double> @test32(<8 x double> %a, <8 x double> %b) nounwind {
+ %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 1, i32 10, i32 2, i32 undef, i32 5, i32 15, i32 undef>
+ ret <8 x double> %c
+define <16 x i32> @test_align_v16i32_rr(<16 x i32> %a, <16 x i32> %b) nounwind {
+; CHECK-LABEL: test_align_v16i32_rr:
+; CHECK: ## BB#0:
+; CHECK-NEXT: valignd $3, %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+ ret <16 x i32> %c
+define <16 x i32> @test_align_v16i32_rm(<16 x i32>* %a.ptr, <16 x i32> %b) nounwind {
+; CHECK-LABEL: test_align_v16i32_rm:
+; CHECK: ## BB#0:
+; CHECK-NEXT: valignd $3, (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %a = load <16 x i32>, <16 x i32>* %a.ptr
+ %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+ ret <16 x i32> %c
+define <16 x i32> @test_align_v16i32_rm_mask(<16 x i32>* %a.ptr, <16 x i32> %b, <16 x i1> %mask) nounwind {
+; CHECK-LABEL: test_align_v16i32_rm_mask:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsxbd %xmm1, %zmm1
+; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm1, %zmm1
+; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
+; CHECK-NEXT: vmovdqa32 (%rdi), %zmm1
+; CHECK-NEXT: valignd $3, %zmm1, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+; CHECK-SKX-LABEL: test_align_v16i32_rm_mask:
+; CHECK-SKX: ## BB#0:
+; CHECK-SKX-NEXT: vpmovb2m %xmm1, %k1
+; CHECK-SKX-NEXT: vmovdqa32 (%rdi), %zmm1
+; CHECK-SKX-NEXT: valignd $3, %zmm1, %zmm0, %zmm1 {%k1}
+; CHECK-SKX-NEXT: vmovaps %zmm1, %zmm0
+ %a = load <16 x i32>, <16 x i32>* %a.ptr
+ %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+ %res = select <16 x i1> %mask,<16 x i32> %c, <16 x i32> %a
+ ret <16 x i32> %res
+define <8 x double> @test_align_v8f64_rr(<8 x double> %a, <8 x double> %b) nounwind {
+; CHECK-LABEL: test_align_v8f64_rr:
+; CHECK: ## BB#0:
+; CHECK-NEXT: valignq $3, %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+ ret <8 x double> %c
+define <8 x double> @test_align_v18f64_rm(<8 x double>* %a.ptr, <8 x double> %b) nounwind {
+; CHECK-LABEL: test_align_v18f64_rm:
+; CHECK: ## BB#0:
+; CHECK-NEXT: valignq $3, (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %a = load <8 x double>, <8 x double>* %a.ptr
+ %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+ ret <8 x double> %c
+define <8 x double> @test_align_v18f64_rm_mask(<8 x double>* %a.ptr, <8 x double> %b, <8 x i1> %mask) nounwind {
+; CHECK-LABEL: test_align_v18f64_rm_mask:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsxwq %xmm1, %zmm1
+; CHECK-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
+; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1
+; CHECK-NEXT: valignq $3, (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+; CHECK-SKX-LABEL: test_align_v18f64_rm_mask:
+; CHECK-SKX: ## BB#0:
+; CHECK-SKX-NEXT: vpmovw2m %xmm1, %k1
+; CHECK-SKX-NEXT: valignq $3, (%rdi), %zmm0, %zmm0 {%k1} {z}
+ %a = load <8 x double>, <8 x double>* %a.ptr
+ %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+ %res = select <8 x i1> %mask,<8 x double> %c, <8 x double> zeroinitializer
+ ret <8 x double> %res
diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll
index e1f6276..04028a1 100644
--- a/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -116,11 +116,8 @@ define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
; KNL-LABEL: test9:
; KNL: ## BB#0:
-; KNL-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
-; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
-; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
; KNL-NEXT: retq
%mask = icmp eq <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
@@ -130,11 +127,8 @@ define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
; KNL-LABEL: test10:
; KNL: ## BB#0:
-; KNL-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
-; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
-; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
; KNL-NEXT: retq
; SKX-LABEL: test10:
; SKX: ## BB#0:
@@ -166,7 +160,6 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
; KNL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
; KNL-NEXT: kunpckbw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: ## kill: AX<def> AX<kill> EAX<kill>
; KNL-NEXT: retq
%res = icmp eq <16 x i64> %a, %b
%res1 = bitcast <16 x i1> %res to i16
diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll
index fd76ed5..9d96c27 100644
--- a/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -668,7 +668,7 @@ declare <4 x float><4 x float> %data, <4
; CHECK-LABEL: compr7
; CHECK-NOT: vcompress
-; CHECK: vmovapd
+; CHECK: vmovupd
define void @compr7(i8* %addr, <8 x double> %data) {
call void* %addr, <8 x double> %data, i8 -1)
ret void
@@ -757,7 +757,7 @@ declare <4 x float><4 x float> %data, <4 x
; CHECK-LABEL: expand7
; CHECK-NOT: vexpand
-; CHECK: vmovapd
+; CHECK: vmovupd
define <8 x double> @expand7(i8* %addr, <8 x double> %data) {
%res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1)
ret <8 x double> %res
@@ -2552,4 +2552,38 @@ define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %
%res = call <4 x float><4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
ret <4 x float> %res
-declare <4 x float><4 x float>, <4 x float>, <4 x float>, i8) \ No newline at end of file
+declare <4 x float><4 x float>, <4 x float>, <4 x float>, i8)
+define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) {
+ ; CHECK-LABEL: test_sqrt_pd_256
+ ; CHECK: vsqrtpd
+ %res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
+ ret <4 x double> %res
+declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
+define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) {
+ ; CHECK-LABEL: test_sqrt_ps_256
+ ; CHECK: vsqrtps
+ %res = call <8 x float><8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+declare <8 x float><8 x float>, <8 x float>, i8) nounwind readnone
+define <4 x double> @test_getexp_pd_256(<4 x double> %a0) {
+ ; CHECK-LABEL: test_getexp_pd_256
+ ; CHECK: vgetexppd
+ %res = call <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
+ ret <4 x double> %res
+declare <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
+define <8 x float> @test_getexp_ps_256(<8 x float> %a0) {
+ ; CHECK-LABEL: test_getexp_ps_256
+ ; CHECK: vgetexpps
+ %res = call <8 x float><8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
+ ret <8 x float> %res
+declare <8 x float><8 x float>, <8 x float>, i8) nounwind readnone \ No newline at end of file
diff --git a/test/CodeGen/X86/buildvec-insertvec.ll b/test/CodeGen/X86/buildvec-insertvec.ll
index 3fb69a4..73dbe1f 100644
--- a/test/CodeGen/X86/buildvec-insertvec.ll
+++ b/test/CodeGen/X86/buildvec-insertvec.ll
@@ -1,15 +1,56 @@
-; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s
define void @foo(<3 x float> %in, <4 x i8>* nocapture %out) nounwind {
+; CHECK-LABEL: foo:
+; CHECK: # BB#0:
+; CHECK-NEXT: cvttps2dq %xmm0, %xmm0
+; CHECK-NEXT: movl $255, %eax
+; CHECK-NEXT: pinsrd $3, %eax, %xmm0
+; CHECK-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; CHECK-NEXT: movd %xmm0, (%rdi)
+; CHECK-NEXT: retq
%t0 = fptoui <3 x float> %in to <3 x i8>
%t1 = shufflevector <3 x i8> %t0, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
%t2 = insertelement <4 x i8> %t1, i8 -1, i32 3
store <4 x i8> %t2, <4 x i8>* %out, align 4
ret void
-; CHECK: foo
-; CHECK: cvttps2dq
-; CHECK-NOT: pextrd
-; CHECK: pinsrd
-; CHECK-NEXT: pshufb
-; CHECK: ret
+; Verify that the DAGCombiner doesn't wrongly fold a build_vector into a
+; blend with a zero vector if the build_vector contains negative zero.
+; TODO: the codegen for function 'test_negative_zero_1' is sub-optimal.
+; Ideally, we should generate a single shuffle blend operation.
+define <4 x float> @test_negative_zero_1(<4 x float> %A) {
+; CHECK-LABEL: test_negative_zero_1:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movapd %xmm0, %xmm1
+; CHECK-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; CHECK-NEXT: xorps %xmm2, %xmm2
+; CHECK-NEXT: blendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; CHECK-NEXT: retq
+ %0 = extractelement <4 x float> %A, i32 0
+ %1 = insertelement <4 x float> undef, float %0, i32 0
+ %2 = insertelement <4 x float> %1, float -0.0, i32 1
+ %3 = extractelement <4 x float> %A, i32 2
+ %4 = insertelement <4 x float> %2, float %3, i32 2
+ %5 = insertelement <4 x float> %4, float 0.0, i32 3
+ ret <4 x float> %5
+define <2 x double> @test_negative_zero_2(<2 x double> %A) {
+; CHECK-LABEL: test_negative_zero_2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movhpd {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %0 = extractelement <2 x double> %A, i32 0
+ %1 = insertelement <2 x double> undef, double %0, i32 0
+ %2 = insertelement <2 x double> %1, double -0.0, i32 1
+ ret <2 x double> %2
diff --git a/test/CodeGen/X86/critical-anti-dep-breaker.ll b/test/CodeGen/X86/critical-anti-dep-breaker.ll
index 86afc1f..de5744d 100644
--- a/test/CodeGen/X86/critical-anti-dep-breaker.ll
+++ b/test/CodeGen/X86/critical-anti-dep-breaker.ll
@@ -11,8 +11,7 @@
@NullToken = external global i64
; CHECK-LABEL: Part_Create:
-; CHECK-DAG: # kill: RDI<def>
-; CHECK-DAG: movq PartClass@GOTPCREL(%rip), %r10
+; CHECK: movq PartClass@GOTPCREL(%rip), %r10
define i32 @Part_Create(i64* %Anchor, i32 %TypeNum, i32 %F, i32 %Z, i32* %Status, i64* %PartTkn) {
%PartObj = alloca i64*, align 8
%Vchunk = alloca i64, align 8
diff --git a/test/CodeGen/X86/machine-cp.ll b/test/CodeGen/X86/machine-cp.ll
index 0006b6e..aaed0f0 100644
--- a/test/CodeGen/X86/machine-cp.ll
+++ b/test/CodeGen/X86/machine-cp.ll
@@ -58,3 +58,58 @@ while.end: ; preds = %while.body, %entry
%t = trunc i64 %a.addr.0.lcssa to i32
ret i32 %t
+; Check that copy propagation does not kill thing like:
+; dst = copy src <-- do not kill that.
+; ... = op1 dst<undef>
+; ... = op2 dst <-- this is used here.
+; CHECK-LABEL: foo:
+; CHECK: psllw $7,
+; CHECK: psllw $7,
+; CHECK-NEXT: pand
+; CHECK-NEXT: pcmpgtb
+; CHECK-NEXT: pand %xmm{{[0-9]+}}, [[SRC:%xmm[0-9]+]]
+; Machine propagation used to delete the first copy as the
+; first few uses were <undef>.
+; CHECK-NEXT: movdqa [[SRC]], [[CPY1:%xmm[0-9]+]]
+; CHECK-NEXT: movdqa [[SRC]], [[CPY2:%xmm[0-9]+]]
+; CHECK-NEXT: punpckhbw [[SRC]],
+; Check that CPY1 is not redefined.
+; CHECK-NOT: , [[CPY1]]
+; undef use, we do not care.
+; CHECK: punpcklwd [[CPY1]],
+; Check that CPY1 is not redefined.
+; CHECK-NOT: , [[CPY1]]
+; CHECK: punpcklbw [[CPY2]], [[CPY2]]
+; CHECK-NEXT: punpckhwd [[CPY2]], [[CPY2]]
+; CHECK-NEXT pslld $31, [[CPY2]]
+; Check that CPY1 is not redefined.
+; CHECK-NOT: , [[CPY1]]
+; CHECK: punpcklbw [[CPY1]], [[CPY1]]
+; CHECK-NEXT: punpcklwd [[CPY1]], [[CPY1]]
+; CHECK-NEXT pslld $31, [[CPY1]]
+define <16 x float> @foo(<16 x float> %x) {
+ %v3 = icmp slt <16 x i32> undef, zeroinitializer
+ %v14 = zext <16 x i1> %v3 to <16 x i32>
+ %v16 = fcmp olt <16 x float> %x, zeroinitializer
+ %v17 = sext <16 x i1> %v16 to <16 x i32>
+ %v18 = zext <16 x i1> %v16 to <16 x i32>
+ %v19 = xor <16 x i32> %v14, %v18
+ %v20 = or <16 x i32> %v17, undef
+ %v21 = fptosi <16 x float> %x to <16 x i32>
+ %v22 = sitofp <16 x i32> %v21 to <16 x float>
+ %v69 = fcmp ogt <16 x float> %v22, zeroinitializer
+ %v75 = and <16 x i1> %v69, %v3
+ %v77 = bitcast <16 x float> %v22 to <16 x i32>
+ %v79 = sext <16 x i1> %v75 to <16 x i32>
+ %v80 = and <16 x i32> undef, %v79
+ %v81 = xor <16 x i32> %v77, %v80
+ %v82 = and <16 x i32> undef, %v81
+ %v83 = xor <16 x i32> %v19, %v82
+ %v84 = and <16 x i32> %v83, %v20
+ %v85 = xor <16 x i32> %v19, %v84
+ %v86 = bitcast <16 x i32> %v85 to <16 x float>
+ ret <16 x float> %v86
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
index d543deb..73be234 100644
--- a/test/CodeGen/X86/pic.ll
+++ b/test/CodeGen/X86/pic.ll
@@ -196,9 +196,11 @@ bb12:
; LINUX: .long .LBB7_2@GOTOFF
; LINUX: .long .LBB7_8@GOTOFF
-; LINUX: .long .LBB7_14@GOTOFF
-; LINUX: .long .LBB7_9@GOTOFF
-; LINUX: .long .LBB7_10@GOTOFF
+; LINUX: .long .LBB7_4@GOTOFF
+; LINUX: .long .LBB7_6@GOTOFF
+; LINUX: .long .LBB7_5@GOTOFF
+; LINUX: .long .LBB7_8@GOTOFF
+; LINUX: .long .LBB7_7@GOTOFF
declare void @foo1(...)
diff --git a/test/CodeGen/X86/pr23603.ll b/test/CodeGen/X86/pr23603.ll
new file mode 100644
index 0000000..6f856ae
--- /dev/null
+++ b/test/CodeGen/X86/pr23603.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+declare void @free_v()
+define void @f(i32* %x, i32 %c32, i32* %y) {
+ entry:
+ %v = load i32, i32* %x, !invariant.load !0
+; CHECK: movl (%rdi), %ebx
+; CHECK: free_v
+; CHECK-NOT: movl (%rdi), %ebx
+ call void @free_v()
+ %c = icmp ne i32 %c32, 0
+ br i1 %c, label %left, label %merge
+ left:
+ store i32 %v, i32* %y
+ br label %merge
+ merge:
+ ret void
+!0 = !{}
diff --git a/test/CodeGen/X86/pr23664.ll b/test/CodeGen/X86/pr23664.ll
new file mode 100644
index 0000000..a501c0d
--- /dev/null
+++ b/test/CodeGen/X86/pr23664.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+define i2 @f(i32 %arg) {
+ %trunc = trunc i32 %arg to i1
+ %sext = sext i1 %trunc to i2
+ %or = or i2 %sext, 1
+ ret i2 %or
+; CHECK: addb %dil, %dil
+; CHECK-NEXT: orb $1, %dil
+; CHECK-NEXT: movb %dil, %al
+; CHECK-NEXT: retq
diff --git a/test/CodeGen/X86/recip-fastmath.ll b/test/CodeGen/X86/recip-fastmath.ll
index fcd0770..7f1521a 100644
--- a/test/CodeGen/X86/recip-fastmath.ll
+++ b/test/CodeGen/X86/recip-fastmath.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-recip-est | FileCheck %s --check-prefix=RECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-recip-est -x86-recip-refinement-steps=2 | FileCheck %s --check-prefix=REFINE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf,vec-divf | FileCheck %s --check-prefix=RECIP
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf:2,vec-divf:2 | FileCheck %s --check-prefix=REFINE
; If the target's divss/divps instructions are substantially
; slower than rcpss/rcpps with a Newton-Raphson refinement,
diff --git a/test/CodeGen/X86/sibcall-win64.ll b/test/CodeGen/X86/sibcall-win64.ll
index f703872..204e1f8 100644
--- a/test/CodeGen/X86/sibcall-win64.ll
+++ b/test/CodeGen/X86/sibcall-win64.ll
@@ -1,7 +1,11 @@
; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
declare x86_64_win64cc void @win64_callee(i32)
+declare x86_64_win64cc void (i32)* @win64_indirect()
+declare x86_64_win64cc void @win64_other(i32)
declare void @sysv_callee(i32)
+declare void (i32)* @sysv_indirect()
+declare void @sysv_other(i32)
define void @sysv_caller(i32 %p1) {
@@ -40,3 +44,23 @@ define x86_64_win64cc void @win64_matched(i32 %p1) {
; CHECK-LABEL: win64_matched:
; CHECK: jmp win64_callee # TAILCALL
+define x86_64_win64cc void @win64_indirect_caller(i32 %p1) {
+ %1 = call x86_64_win64cc void (i32)* @win64_indirect()
+ call x86_64_win64cc void @win64_other(i32 0)
+ tail call x86_64_win64cc void %1(i32 %p1)
+ ret void
+; CHECK-LABEL: win64_indirect_caller:
+; CHECK: jmpq *%{{rax|rcx|rdx|r8|r9|r11}} # TAILCALL
+define void @sysv_indirect_caller(i32 %p1) {
+ %1 = call void (i32)* @sysv_indirect()
+ call void @sysv_other(i32 0)
+ tail call void %1(i32 %p1)
+ ret void
+; CHECK-LABEL: sysv_indirect_caller:
+; CHECK: jmpq *%{{rax|rcx|rdx|rsi|rdi|r8|r9|r11}} # TAILCALL
diff --git a/test/CodeGen/X86/sqrt-fastmath.ll b/test/CodeGen/X86/sqrt-fastmath.ll
index 4c6b521..373fa53 100644
--- a/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/test/CodeGen/X86/sqrt-fastmath.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-sqrt-est | FileCheck %s --check-prefix=ESTIMATE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=sqrtf,vec-sqrtf | FileCheck %s --check-prefix=ESTIMATE
declare double @__sqrt_finite(double) #0
declare float @__sqrtf_finite(float) #0
diff --git a/test/CodeGen/X86/stack-folding-x86_64.ll b/test/CodeGen/X86/stack-folding-x86_64.ll
new file mode 100644
index 0000000..2112279
--- /dev/null
+++ b/test/CodeGen/X86/stack-folding-x86_64.ll
@@ -0,0 +1,51 @@
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+; Stack reload folding tests.
+; By including a nop call with sideeffects we can force a partial register spill of the
+; relevant registers and check that the reload is correctly folded into the instruction.
+;TODO stack_fold_bsf_i16
+declare i16 @llvm.cttz.i16(i16, i1)
+define i32 @stack_fold_bsf_i32(i32 %a0) {
+ ;CHECK-LABEL: stack_fold_bsf_i32
+ ;CHECK: bsfl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
+ %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = call i32 @llvm.cttz.i32(i32 %a0, i1 -1)
+ ret i32 %2
+declare i32 @llvm.cttz.i32(i32, i1)
+define i64 @stack_fold_bsf_i64(i64 %a0) {
+ ;CHECK-LABEL: stack_fold_bsf_i64
+ ;CHECK: bsfq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
+ %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = call i64 @llvm.cttz.i64(i64 %a0, i1 -1)
+ ret i64 %2
+declare i64 @llvm.cttz.i64(i64, i1)
+;TODO stack_fold_bsr_i16
+declare i16 @llvm.ctlz.i16(i16, i1)
+define i32 @stack_fold_bsr_i32(i32 %a0) {
+ ;CHECK-LABEL: stack_fold_bsr_i32
+ ;CHECK: bsrl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
+ %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = call i32 @llvm.ctlz.i32(i32 %a0, i1 -1)
+ ret i32 %2
+declare i32 @llvm.ctlz.i32(i32, i1)
+define i64 @stack_fold_bsr_i64(i64 %a0) {
+ ;CHECK-LABEL: stack_fold_bsr_i64
+ ;CHECK: bsrq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
+ %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = call i64 @llvm.ctlz.i64(i64 %a0, i1 -1)
+ ret i64 %2
+declare i64 @llvm.ctlz.i64(i64, i1)
diff --git a/test/CodeGen/X86/statepoint-far-call.ll b/test/CodeGen/X86/statepoint-far-call.ll
new file mode 100644
index 0000000..cd8dd0f
--- /dev/null
+++ b/test/CodeGen/X86/statepoint-far-call.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+; Test to check that Statepoints with X64 far-immediate targets
+; are lowered correctly to an indirect call via a scratch register.
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-win64"
+define void @test_far_call() gc "statepoint-example" {
+; CHECK-LABEL: test_far_call
+; CHECK: pushq %rax
+; CHECK: movabsq $140727162896504, %rax
+; CHECK: callq *%rax
+; CHECK: popq %rax
+; CHECK: retq
+ %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* inttoptr (i64 140727162896504 to void ()*), i32 0, i32 0, i32 0, i32 0)
+ ret void
+declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/CodeGen/X86/switch-or.ll b/test/CodeGen/X86/switch-or.ll
index 6e6b013..4642acc 100644
--- a/test/CodeGen/X86/switch-or.ll
+++ b/test/CodeGen/X86/switch-or.ll
@@ -1,10 +1,11 @@
; RUN: llc -march=x86 -asm-verbose=false < %s | FileCheck %s
; Check that merging switch cases that differ in one bit works.
+; CHECK-LABEL: test1
; CHECK: orl $2
; CHECK-NEXT: cmpl $6
-define void @foo(i32 %variable) nounwind {
+define void @test1(i32 %variable) nounwind {
switch i32 %variable, label %if.end [
i32 4, label %if.then
@@ -19,4 +20,22 @@ if.end:
ret void
+; CHECK-LABEL: test2
+; CHECK: orl $-2147483648
+; CHECK-NEXT: cmpl $-2147483648
+define void @test2(i32 %variable) nounwind {
+ switch i32 %variable, label %if.end [
+ i32 0, label %if.then
+ i32 -2147483648, label %if.then
+ ]
+ %call = tail call i32 (...) @bar() nounwind
+ ret void
+ ret void
declare i32 @bar(...) nounwind
diff --git a/test/CodeGen/X86/switch.ll b/test/CodeGen/X86/switch.ll
index 66a739c..a4dece6 100644
--- a/test/CodeGen/X86/switch.ll
+++ b/test/CodeGen/X86/switch.ll
@@ -534,3 +534,18 @@ return: ret void
; CHECK-NOT: cmpl
; CHECK: cmpl $99
+define void @pr23738(i4 %x) {
+ switch i4 %x, label %bb0 [
+ i4 0, label %bb1
+ i4 1, label %bb1
+ i4 -5, label %bb1
+ ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+return: ret void
+; Don't assert due to truncating the bitwidth (64) to i4 when checking
+; that the bit-test range fits in a word.
diff --git a/test/CodeGen/X86/tail-call-got.ll b/test/CodeGen/X86/tail-call-got.ll
index 84d561d..20d1a87 100644
--- a/test/CodeGen/X86/tail-call-got.ll
+++ b/test/CodeGen/X86/tail-call-got.ll
@@ -1,12 +1,14 @@
; RUN: llc < %s -relocation-model=pic -mattr=+sse2 | FileCheck %s
+; We used to do tail calls through the GOT for these symbols, but it was
+; disabled due to PR15086.
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
target triple = "i386-unknown-freebsd9.0"
define double @test1(double %x) nounwind readnone {
; CHECK-LABEL: test1:
-; CHECK: movl foo@GOT
-; CHECK-NEXT: jmpl
+; CHECK: calll foo@PLT
%1 = tail call double @foo(double %x) nounwind readnone
ret double %1
@@ -15,10 +17,18 @@ declare double @foo(double) readnone
define double @test2(double %x) nounwind readnone {
; CHECK-LABEL: test2:
-; CHECK: movl sin@GOT
-; CHECK-NEXT: jmpl
+; CHECK: calll sin@PLT
%1 = tail call double @sin(double %x) nounwind readnone
ret double %1
declare double @sin(double) readnone
+define double @test3(double %x) nounwind readnone {
+; CHECK-LABEL: test3:
+; CHECK: calll sin2@PLT
+ %1 = tail call double @sin2(double %x) nounwind readnone
+ ret double %1
+declare double @sin2(double) readnone
diff --git a/test/CodeGen/X86/tailcallpic1.ll b/test/CodeGen/X86/tailcallpic1.ll
index ff590a1..ed101fc 100644
--- a/test/CodeGen/X86/tailcallpic1.ll
+++ b/test/CodeGen/X86/tailcallpic1.ll
@@ -1,5 +1,8 @@
; RUN: llc < %s -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | FileCheck %s
+; This test uses guaranteed TCO so these will be tail calls, despite the early
+; binding issues.
define protected fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
ret i32 %a3
diff --git a/test/CodeGen/X86/tailcallpic3.ll b/test/CodeGen/X86/tailcallpic3.ll
new file mode 100644
index 0000000..edc5805
--- /dev/null
+++ b/test/CodeGen/X86/tailcallpic3.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic | FileCheck %s
+; While many of these could be tail called, we don't do it because it forces
+; early binding.
+declare void @external()
+define hidden void @tailcallee_hidden() {
+ ret void
+define void @tailcall_hidden() {
+ tail call void @tailcallee_hidden()
+ ret void
+; CHECK: tailcall_hidden:
+; CHECK: jmp tailcallee_hidden
+define internal void @tailcallee_internal() {
+ ret void
+define void @tailcall_internal() {
+ tail call void @tailcallee_internal()
+ ret void
+; CHECK: tailcall_internal:
+; CHECK: jmp tailcallee_internal
+define default void @tailcallee_default() {
+ ret void
+define void @tailcall_default() {
+ tail call void @tailcallee_default()
+ ret void
+; CHECK: tailcall_default:
+; CHECK: calll tailcallee_default@PLT
+define void @tailcallee_default_implicit() {
+ ret void
+define void @tailcall_default_implicit() {
+ tail call void @tailcallee_default_implicit()
+ ret void
+; CHECK: tailcall_default_implicit:
+; CHECK: calll tailcallee_default_implicit@PLT
+define void @tailcall_external() {
+ tail call void @external()
+ ret void
+; CHECK: tailcall_external:
+; CHECK: calll external@PLT
+define void @musttail_external() {
+ musttail call void @external()
+ ret void
+; CHECK: musttail_external:
+; CHECK: movl external@GOT
+; CHECK: jmpl
diff --git a/test/CodeGen/X86/vec_fp_to_int.ll b/test/CodeGen/X86/vec_fp_to_int.ll
index 9f1c7af..3e72212 100644
--- a/test/CodeGen/X86/vec_fp_to_int.ll
+++ b/test/CodeGen/X86/vec_fp_to_int.ll
@@ -239,7 +239,6 @@ define <4 x i64> @fptoui_4vf64(<4 x double> %a) {
; SSE2: # BB#0:
; SSE2-NEXT: movapd %xmm0, %xmm2
; SSE2-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
-; SSE2-NEXT: {{.*#+}} kill: XMM0<def> XMM2<kill>
; SSE2-NEXT: subsd %xmm3, %xmm0
; SSE2-NEXT: cvttsd2si %xmm0, %rcx
; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
@@ -589,7 +588,6 @@ define <8 x i32> @fptoui_8vf32(<8 x float> %a) {
; SSE2-LABEL: fptoui_8vf32:
; SSE2: # BB#0:
; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: {{.*#+}} kill: XMM0<def> XMM2<kill>
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: cvttss2si %xmm0, %rax
; SSE2-NEXT: movd %eax, %xmm0
diff --git a/test/CodeGen/X86/vec_shift8.ll b/test/CodeGen/X86/vec_shift8.ll
new file mode 100644
index 0000000..a32cb30
--- /dev/null
+++ b/test/CodeGen/X86/vec_shift8.ll
@@ -0,0 +1,1016 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
+; Vectorized integer shifts
+define <2 x i64> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind readnone ssp {
+; SSE2: pextrw $7, %xmm0, %eax
+; SSE2-NEXT: pextrw $7, %xmm1, %ecx
+; SSE2-NEXT: shll %cl, %eax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: pextrw $3, %xmm0, %eax
+; SSE2-NEXT: pextrw $3, %xmm1, %ecx
+; SSE2-NEXT: shll %cl, %eax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE2-NEXT: pextrw $5, %xmm0, %eax
+; SSE2-NEXT: pextrw $5, %xmm1, %ecx
+; SSE2-NEXT: shll %cl, %eax
+; SSE2-NEXT: movd %eax, %xmm4
+; SSE2-NEXT: pextrw $1, %xmm0, %eax
+; SSE2-NEXT: pextrw $1, %xmm1, %ecx
+; SSE2-NEXT: shll %cl, %eax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
+; SSE2-NEXT: pextrw $6, %xmm0, %eax
+; SSE2-NEXT: pextrw $6, %xmm1, %ecx
+; SSE2-NEXT: shll %cl, %eax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: pextrw $2, %xmm0, %eax
+; SSE2-NEXT: pextrw $2, %xmm1, %ecx
+; SSE2-NEXT: shll %cl, %eax
+; SSE2-NEXT: movd %eax, %xmm4
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; SSE2-NEXT: pextrw $4, %xmm0, %eax
+; SSE2-NEXT: pextrw $4, %xmm1, %ecx
+; SSE2-NEXT: shll %cl, %eax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: movd %xmm1, %ecx
+; SSE2-NEXT: shll %cl, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: retq
+; SSE41: pextrw $1, %xmm0, %eax
+; SSE41-NEXT: pextrw $1, %xmm1, %ecx
+; SSE41-NEXT: shll %cl, %eax
+; SSE41-NEXT: movd %xmm0, %edx
+; SSE41-NEXT: movd %xmm1, %ecx
+; SSE41-NEXT: shll %cl, %edx
+; SSE41-NEXT: movd %edx, %xmm2
+; SSE41-NEXT: pinsrw $1, %eax, %xmm2
+; SSE41-NEXT: pextrw $2, %xmm0, %eax
+; SSE41-NEXT: pextrw $2, %xmm1, %ecx
+; SSE41-NEXT: shll %cl, %eax
+; SSE41-NEXT: pinsrw $2, %eax, %xmm2
+; SSE41-NEXT: pextrw $3, %xmm0, %eax
+; SSE41-NEXT: pextrw $3, %xmm1, %ecx
+; SSE41-NEXT: shll %cl, %eax
+; SSE41-NEXT: pinsrw $3, %eax, %xmm2
+; SSE41-NEXT: pextrw $4, %xmm0, %eax
+; SSE41-NEXT: pextrw $4, %xmm1, %ecx
+; SSE41-NEXT: shll %cl, %eax
+; SSE41-NEXT: pinsrw $4, %eax, %xmm2
+; SSE41-NEXT: pextrw $5, %xmm0, %eax
+; SSE41-NEXT: pextrw $5, %xmm1, %ecx
+; SSE41-NEXT: shll %cl, %eax
+; SSE41-NEXT: pinsrw $5, %eax, %xmm2
+; SSE41-NEXT: pextrw $6, %xmm0, %eax
+; SSE41-NEXT: pextrw $6, %xmm1, %ecx
+; SSE41-NEXT: shll %cl, %eax
+; SSE41-NEXT: pinsrw $6, %eax, %xmm2
+; SSE41-NEXT: pextrw $7, %xmm0, %eax
+; SSE41-NEXT: pextrw $7, %xmm1, %ecx
+; SSE41-NEXT: shll %cl, %eax
+; SSE41-NEXT: pinsrw $7, %eax, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+; AVX: vpextrw $1, %xmm0, %eax
+; AVX-NEXT: vpextrw $1, %xmm1, %ecx
+; AVX-NEXT: shll %cl, %eax
+; AVX-NEXT: vmovd %xmm0, %edx
+; AVX-NEXT: vmovd %xmm1, %ecx
+; AVX-NEXT: shll %cl, %edx
+; AVX-NEXT: vmovd %edx, %xmm2
+; AVX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrw $2, %xmm0, %eax
+; AVX-NEXT: vpextrw $2, %xmm1, %ecx
+; AVX-NEXT: shll %cl, %eax
+; AVX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrw $3, %xmm0, %eax
+; AVX-NEXT: vpextrw $3, %xmm1, %ecx
+; AVX-NEXT: shll %cl, %eax
+; AVX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrw $4, %xmm0, %eax
+; AVX-NEXT: vpextrw $4, %xmm1, %ecx
+; AVX-NEXT: shll %cl, %eax
+; AVX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrw $5, %xmm0, %eax
+; AVX-NEXT: vpextrw $5, %xmm1, %ecx
+; AVX-NEXT: shll %cl, %eax
+; AVX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrw $6, %xmm0, %eax
+; AVX-NEXT: vpextrw $6, %xmm1, %ecx
+; AVX-NEXT: shll %cl, %eax
+; AVX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrw $7, %xmm0, %eax
+; AVX-NEXT: vpextrw $7, %xmm1, %ecx
+; AVX-NEXT: shll %cl, %eax
+; AVX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
+; AVX-NEXT: retq
+ %shl = shl <8 x i16> %r, %a
+ %tmp2 = bitcast <8 x i16> %shl to <2 x i64>
+ ret <2 x i64> %tmp2
+define <2 x i64> @shl_16i8(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp {
+; SSE2: psllw $5, %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: pand %xmm1, %xmm3
+; SSE2-NEXT: pcmpeqb %xmm2, %xmm3
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm4
+; SSE2-NEXT: psllw $4, %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: paddb %xmm1, %xmm1
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: pand %xmm1, %xmm3
+; SSE2-NEXT: pcmpeqb %xmm2, %xmm3
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm4
+; SSE2-NEXT: psllw $2, %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: paddb %xmm1, %xmm1
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pcmpeqb %xmm2, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: paddb %xmm0, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: retq
+; SSE41: movdqa %xmm0, %xmm2
+; SSE41-NEXT: psllw $5, %xmm1
+; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm5
+; SSE41-NEXT: paddb %xmm5, %xmm5
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; SSE41-NEXT: movdqa %xmm3, %xmm4
+; SSE41-NEXT: pand %xmm5, %xmm4
+; SSE41-NEXT: pcmpeqb %xmm3, %xmm4
+; SSE41-NEXT: pand %xmm3, %xmm1
+; SSE41-NEXT: pcmpeqb %xmm3, %xmm1
+; SSE41-NEXT: movdqa %xmm2, %xmm6
+; SSE41-NEXT: psllw $4, %xmm6
+; SSE41-NEXT: pand {{.*}}(%rip), %xmm6
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: pblendvb %xmm6, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm1
+; SSE41-NEXT: psllw $2, %xmm1
+; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE41-NEXT: movdqa %xmm4, %xmm0
+; SSE41-NEXT: pblendvb %xmm1, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm1
+; SSE41-NEXT: paddb %xmm1, %xmm1
+; SSE41-NEXT: paddb %xmm5, %xmm5
+; SSE41-NEXT: pand %xmm3, %xmm5
+; SSE41-NEXT: pcmpeqb %xmm5, %xmm3
+; SSE41-NEXT: movdqa %xmm3, %xmm0
+; SSE41-NEXT: pblendvb %xmm1, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+; AVX: vpsllw $5, %xmm1, %xmm1
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; AVX-NEXT: vpand %xmm2, %xmm3, %xmm4
+; AVX-NEXT: vpcmpeqb %xmm3, %xmm4, %xmm4
+; AVX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; AVX-NEXT: vpcmpeqb %xmm3, %xmm1, %xmm1
+; AVX-NEXT: vpsllw $4, %xmm0, %xmm5
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm5, %xmm5
+; AVX-NEXT: vpblendvb %xmm1, %xmm5, %xmm0, %xmm0
+; AVX-NEXT: vpsllw $2, %xmm0, %xmm1
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpblendvb %xmm4, %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm1
+; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpand %xmm2, %xmm3, %xmm2
+; AVX-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shl = shl <16 x i8> %r, %a
+ %tmp2 = bitcast <16 x i8> %shl to <2 x i64>
+ ret <2 x i64> %tmp2
+define <2 x i64> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind readnone ssp {
+; SSE2: pextrw $7, %xmm1, %ecx
+; SSE2-NEXT: pextrw $7, %xmm0, %eax
+; SSE2-NEXT: sarw %cl, %ax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: pextrw $3, %xmm1, %ecx
+; SSE2-NEXT: pextrw $3, %xmm0, %eax
+; SSE2-NEXT: sarw %cl, %ax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE2-NEXT: pextrw $5, %xmm1, %ecx
+; SSE2-NEXT: pextrw $5, %xmm0, %eax
+; SSE2-NEXT: sarw %cl, %ax
+; SSE2-NEXT: movd %eax, %xmm4
+; SSE2-NEXT: pextrw $1, %xmm1, %ecx
+; SSE2-NEXT: pextrw $1, %xmm0, %eax
+; SSE2-NEXT: sarw %cl, %ax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
+; SSE2-NEXT: pextrw $6, %xmm1, %ecx
+; SSE2-NEXT: pextrw $6, %xmm0, %eax
+; SSE2-NEXT: sarw %cl, %ax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: pextrw $2, %xmm1, %ecx
+; SSE2-NEXT: pextrw $2, %xmm0, %eax
+; SSE2-NEXT: sarw %cl, %ax
+; SSE2-NEXT: movd %eax, %xmm4
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; SSE2-NEXT: pextrw $4, %xmm1, %ecx
+; SSE2-NEXT: pextrw $4, %xmm0, %eax
+; SSE2-NEXT: sarw %cl, %ax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: movd %xmm1, %ecx
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: sarw %cl, %ax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: retq
+; SSE41: pextrw $1, %xmm1, %ecx
+; SSE41-NEXT: pextrw $1, %xmm0, %eax
+; SSE41-NEXT: sarw %cl, %ax
+; SSE41-NEXT: movd %xmm1, %ecx
+; SSE41-NEXT: movd %xmm0, %edx
+; SSE41-NEXT: sarw %cl, %dx
+; SSE41-NEXT: movd %edx, %xmm2
+; SSE41-NEXT: pinsrw $1, %eax, %xmm2
+; SSE41-NEXT: pextrw $2, %xmm1, %ecx
+; SSE41-NEXT: pextrw $2, %xmm0, %eax
+; SSE41-NEXT: sarw %cl, %ax
+; SSE41-NEXT: pinsrw $2, %eax, %xmm2
+; SSE41-NEXT: pextrw $3, %xmm1, %ecx
+; SSE41-NEXT: pextrw $3, %xmm0, %eax
+; SSE41-NEXT: sarw %cl, %ax
+; SSE41-NEXT: pinsrw $3, %eax, %xmm2
+; SSE41-NEXT: pextrw $4, %xmm1, %ecx
+; SSE41-NEXT: pextrw $4, %xmm0, %eax
+; SSE41-NEXT: sarw %cl, %ax
+; SSE41-NEXT: pinsrw $4, %eax, %xmm2
+; SSE41-NEXT: pextrw $5, %xmm1, %ecx
+; SSE41-NEXT: pextrw $5, %xmm0, %eax
+; SSE41-NEXT: sarw %cl, %ax
+; SSE41-NEXT: pinsrw $5, %eax, %xmm2
+; SSE41-NEXT: pextrw $6, %xmm1, %ecx
+; SSE41-NEXT: pextrw $6, %xmm0, %eax
+; SSE41-NEXT: sarw %cl, %ax
+; SSE41-NEXT: pinsrw $6, %eax, %xmm2
+; SSE41-NEXT: pextrw $7, %xmm1, %ecx
+; SSE41-NEXT: pextrw $7, %xmm0, %eax
+; SSE41-NEXT: sarw %cl, %ax
+; SSE41-NEXT: pinsrw $7, %eax, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+; AVX: vpextrw $1, %xmm1, %ecx
+; AVX-NEXT: vpextrw $1, %xmm0, %eax
+; AVX-NEXT: sarw %cl, %ax
+; AVX-NEXT: vmovd %xmm1, %ecx
+; AVX-NEXT: vmovd %xmm0, %edx
+; AVX-NEXT: sarw %cl, %dx
+; AVX-NEXT: vmovd %edx, %xmm2
+; AVX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrw $2, %xmm1, %ecx
+; AVX-NEXT: vpextrw $2, %xmm0, %eax
+; AVX-NEXT: sarw %cl, %ax
+; AVX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrw $3, %xmm1, %ecx
+; AVX-NEXT: vpextrw $3, %xmm0, %eax
+; AVX-NEXT: sarw %cl, %ax
+; AVX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrw $4, %xmm1, %ecx
+; AVX-NEXT: vpextrw $4, %xmm0, %eax
+; AVX-NEXT: sarw %cl, %ax
+; AVX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrw $5, %xmm1, %ecx
+; AVX-NEXT: vpextrw $5, %xmm0, %eax
+; AVX-NEXT: sarw %cl, %ax
+; AVX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrw $6, %xmm1, %ecx
+; AVX-NEXT: vpextrw $6, %xmm0, %eax
+; AVX-NEXT: sarw %cl, %ax
+; AVX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrw $7, %xmm1, %ecx
+; AVX-NEXT: vpextrw $7, %xmm0, %eax
+; AVX-NEXT: sarw %cl, %ax
+; AVX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
+; AVX-NEXT: retq
+ %ashr = ashr <8 x i16> %r, %a
+ %tmp2 = bitcast <8 x i16> %ashr to <2 x i64>
+ ret <2 x i64> %tmp2
+define <2 x i64> @ashr_16i8(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp {
+; SSE2: pushq %rbp
+; SSE2-NEXT: pushq %r15
+; SSE2-NEXT: pushq %r14
+; SSE2-NEXT: pushq %r13
+; SSE2-NEXT: pushq %r12
+; SSE2-NEXT: pushq %rbx
+; SSE2-NEXT: movaps %xmm1, -24(%rsp)
+; SSE2-NEXT: movaps %xmm0, -40(%rsp)
+; SSE2-NEXT: movb -9(%rsp), %cl
+; SSE2-NEXT: movb -25(%rsp), %al
+; SSE2-NEXT: sarb %cl, %al
+; SSE2-NEXT: movzbl %al, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: movb -17(%rsp), %cl
+; SSE2-NEXT: movb -33(%rsp), %al
+; SSE2-NEXT: sarb %cl, %al
+; SSE2-NEXT: movb -13(%rsp), %cl
+; SSE2-NEXT: movzbl %al, %eax
+; SSE2-NEXT: movl %eax, -44(%rsp)
+; SSE2-NEXT: movb -29(%rsp), %al
+; SSE2-NEXT: sarb %cl, %al
+; SSE2-NEXT: movzbl %al, %r9d
+; SSE2-NEXT: movb -21(%rsp), %cl
+; SSE2-NEXT: movb -37(%rsp), %al
+; SSE2-NEXT: sarb %cl, %al
+; SSE2-NEXT: movb -11(%rsp), %cl
+; SSE2-NEXT: movzbl %al, %r10d
+; SSE2-NEXT: movb -27(%rsp), %al
+; SSE2-NEXT: sarb %cl, %al
+; SSE2-NEXT: movb -19(%rsp), %cl
+; SSE2-NEXT: movzbl %al, %r11d
+; SSE2-NEXT: movb -35(%rsp), %al
+; SSE2-NEXT: sarb %cl, %al
+; SSE2-NEXT: movb -15(%rsp), %cl
+; SSE2-NEXT: movzbl %al, %r14d
+; SSE2-NEXT: movb -31(%rsp), %al
+; SSE2-NEXT: sarb %cl, %al
+; SSE2-NEXT: movzbl %al, %r15d
+; SSE2-NEXT: movb -23(%rsp), %cl
+; SSE2-NEXT: movb -39(%rsp), %al
+; SSE2-NEXT: sarb %cl, %al
+; SSE2-NEXT: movb -10(%rsp), %cl
+; SSE2-NEXT: movzbl %al, %r12d
+; SSE2-NEXT: movb -26(%rsp), %al
+; SSE2-NEXT: sarb %cl, %al
+; SSE2-NEXT: movb -18(%rsp), %cl
+; SSE2-NEXT: movzbl %al, %r13d
+; SSE2-NEXT: movb -34(%rsp), %al
+; SSE2-NEXT: sarb %cl, %al
+; SSE2-NEXT: movb -14(%rsp), %cl
+; SSE2-NEXT: movzbl %al, %r8d
+; SSE2-NEXT: movb -30(%rsp), %al
+; SSE2-NEXT: sarb %cl, %al
+; SSE2-NEXT: movb -22(%rsp), %cl
+; SSE2-NEXT: movzbl %al, %ebp
+; SSE2-NEXT: movb -38(%rsp), %al
+; SSE2-NEXT: sarb %cl, %al
+; SSE2-NEXT: movb -12(%rsp), %cl
+; SSE2-NEXT: movzbl %al, %edi
+; SSE2-NEXT: movb -28(%rsp), %dl
+; SSE2-NEXT: sarb %cl, %dl
+; SSE2-NEXT: movb -20(%rsp), %cl
+; SSE2-NEXT: movzbl %dl, %esi
+; SSE2-NEXT: movb -36(%rsp), %bl
+; SSE2-NEXT: sarb %cl, %bl
+; SSE2-NEXT: movb -16(%rsp), %cl
+; SSE2-NEXT: movzbl %bl, %ebx
+; SSE2-NEXT: movb -32(%rsp), %al
+; SSE2-NEXT: sarb %cl, %al
+; SSE2-NEXT: movzbl %al, %edx
+; SSE2-NEXT: movb -24(%rsp), %cl
+; SSE2-NEXT: movb -40(%rsp), %al
+; SSE2-NEXT: sarb %cl, %al
+; SSE2-NEXT: movzbl %al, %eax
+; SSE2-NEXT: movd -44(%rsp), %xmm1
+; SSE2: movd %r9d, %xmm2
+; SSE2-NEXT: movd %r10d, %xmm3
+; SSE2-NEXT: movd %r11d, %xmm4
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: movd %r14d, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE2-NEXT: movd %r15d, %xmm1
+; SSE2-NEXT: movd %r12d, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; SSE2-NEXT: movd %r13d, %xmm0
+; SSE2-NEXT: movd %r8d, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: movd %ebp, %xmm0
+; SSE2-NEXT: movd %edi, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSE2-NEXT: movd %esi, %xmm0
+; SSE2-NEXT: movd %ebx, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: movd %edx, %xmm4
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: popq %rbx
+; SSE2-NEXT: popq %r12
+; SSE2-NEXT: popq %r13
+; SSE2-NEXT: popq %r14
+; SSE2-NEXT: popq %r15
+; SSE2-NEXT: popq %rbp
+; SSE2-NEXT: retq
+; SSE41: pextrb $1, %xmm1, %ecx
+; SSE41-NEXT: pextrb $1, %xmm0, %eax
+; SSE41-NEXT: sarb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pextrb $0, %xmm1, %ecx
+; SSE41-NEXT: pextrb $0, %xmm0, %edx
+; SSE41-NEXT: sarb %cl, %dl
+; SSE41-NEXT: movzbl %dl, %ecx
+; SSE41-NEXT: movd %ecx, %xmm2
+; SSE41-NEXT: pinsrb $1, %eax, %xmm2
+; SSE41-NEXT: pextrb $2, %xmm1, %ecx
+; SSE41-NEXT: pextrb $2, %xmm0, %eax
+; SSE41-NEXT: sarb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $2, %eax, %xmm2
+; SSE41-NEXT: pextrb $3, %xmm1, %ecx
+; SSE41-NEXT: pextrb $3, %xmm0, %eax
+; SSE41-NEXT: sarb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $3, %eax, %xmm2
+; SSE41-NEXT: pextrb $4, %xmm1, %ecx
+; SSE41-NEXT: pextrb $4, %xmm0, %eax
+; SSE41-NEXT: sarb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $4, %eax, %xmm2
+; SSE41-NEXT: pextrb $5, %xmm1, %ecx
+; SSE41-NEXT: pextrb $5, %xmm0, %eax
+; SSE41-NEXT: sarb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $5, %eax, %xmm2
+; SSE41-NEXT: pextrb $6, %xmm1, %ecx
+; SSE41-NEXT: pextrb $6, %xmm0, %eax
+; SSE41-NEXT: sarb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $6, %eax, %xmm2
+; SSE41-NEXT: pextrb $7, %xmm1, %ecx
+; SSE41-NEXT: pextrb $7, %xmm0, %eax
+; SSE41-NEXT: sarb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $7, %eax, %xmm2
+; SSE41-NEXT: pextrb $8, %xmm1, %ecx
+; SSE41-NEXT: pextrb $8, %xmm0, %eax
+; SSE41-NEXT: sarb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $8, %eax, %xmm2
+; SSE41-NEXT: pextrb $9, %xmm1, %ecx
+; SSE41-NEXT: pextrb $9, %xmm0, %eax
+; SSE41-NEXT: sarb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $9, %eax, %xmm2
+; SSE41-NEXT: pextrb $10, %xmm1, %ecx
+; SSE41-NEXT: pextrb $10, %xmm0, %eax
+; SSE41-NEXT: sarb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $10, %eax, %xmm2
+; SSE41-NEXT: pextrb $11, %xmm1, %ecx
+; SSE41-NEXT: pextrb $11, %xmm0, %eax
+; SSE41-NEXT: sarb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $11, %eax, %xmm2
+; SSE41-NEXT: pextrb $12, %xmm1, %ecx
+; SSE41-NEXT: pextrb $12, %xmm0, %eax
+; SSE41-NEXT: sarb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $12, %eax, %xmm2
+; SSE41-NEXT: pextrb $13, %xmm1, %ecx
+; SSE41-NEXT: pextrb $13, %xmm0, %eax
+; SSE41-NEXT: sarb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $13, %eax, %xmm2
+; SSE41-NEXT: pextrb $14, %xmm1, %ecx
+; SSE41-NEXT: pextrb $14, %xmm0, %eax
+; SSE41-NEXT: sarb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $14, %eax, %xmm2
+; SSE41-NEXT: pextrb $15, %xmm1, %ecx
+; SSE41-NEXT: pextrb $15, %xmm0, %eax
+; SSE41-NEXT: sarb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $15, %eax, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+; AVX: vpextrb $1, %xmm1, %ecx
+; AVX-NEXT: vpextrb $1, %xmm0, %eax
+; AVX-NEXT: sarb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpextrb $0, %xmm1, %ecx
+; AVX-NEXT: vpextrb $0, %xmm0, %edx
+; AVX-NEXT: sarb %cl, %dl
+; AVX-NEXT: movzbl %dl, %ecx
+; AVX-NEXT: vmovd %ecx, %xmm2
+; AVX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $2, %xmm1, %ecx
+; AVX-NEXT: vpextrb $2, %xmm0, %eax
+; AVX-NEXT: sarb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $3, %xmm1, %ecx
+; AVX-NEXT: vpextrb $3, %xmm0, %eax
+; AVX-NEXT: sarb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $4, %xmm1, %ecx
+; AVX-NEXT: vpextrb $4, %xmm0, %eax
+; AVX-NEXT: sarb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $5, %xmm1, %ecx
+; AVX-NEXT: vpextrb $5, %xmm0, %eax
+; AVX-NEXT: sarb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $6, %xmm1, %ecx
+; AVX-NEXT: vpextrb $6, %xmm0, %eax
+; AVX-NEXT: sarb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $7, %xmm1, %ecx
+; AVX-NEXT: vpextrb $7, %xmm0, %eax
+; AVX-NEXT: sarb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $8, %xmm1, %ecx
+; AVX-NEXT: vpextrb $8, %xmm0, %eax
+; AVX-NEXT: sarb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $9, %xmm1, %ecx
+; AVX-NEXT: vpextrb $9, %xmm0, %eax
+; AVX-NEXT: sarb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $10, %xmm1, %ecx
+; AVX-NEXT: vpextrb $10, %xmm0, %eax
+; AVX-NEXT: sarb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $11, %xmm1, %ecx
+; AVX-NEXT: vpextrb $11, %xmm0, %eax
+; AVX-NEXT: sarb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $12, %xmm1, %ecx
+; AVX-NEXT: vpextrb $12, %xmm0, %eax
+; AVX-NEXT: sarb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $13, %xmm1, %ecx
+; AVX-NEXT: vpextrb $13, %xmm0, %eax
+; AVX-NEXT: sarb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $14, %xmm1, %ecx
+; AVX-NEXT: vpextrb $14, %xmm0, %eax
+; AVX-NEXT: sarb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $15, %xmm1, %ecx
+; AVX-NEXT: vpextrb $15, %xmm0, %eax
+; AVX-NEXT: sarb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0
+; AVX-NEXT: retq
+ %ashr = ashr <16 x i8> %r, %a
+ %tmp2 = bitcast <16 x i8> %ashr to <2 x i64>
+ ret <2 x i64> %tmp2
+define <2 x i64> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind readnone ssp {
+; SSE2: pextrw $7, %xmm0, %eax
+; SSE2-NEXT: pextrw $7, %xmm1, %ecx
+; SSE2-NEXT: shrl %cl, %eax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: pextrw $3, %xmm0, %eax
+; SSE2-NEXT: pextrw $3, %xmm1, %ecx
+; SSE2-NEXT: shrl %cl, %eax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE2-NEXT: pextrw $5, %xmm0, %eax
+; SSE2-NEXT: pextrw $5, %xmm1, %ecx
+; SSE2-NEXT: shrl %cl, %eax
+; SSE2-NEXT: movd %eax, %xmm4
+; SSE2-NEXT: pextrw $1, %xmm0, %eax
+; SSE2-NEXT: pextrw $1, %xmm1, %ecx
+; SSE2-NEXT: shrl %cl, %eax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
+; SSE2-NEXT: pextrw $6, %xmm0, %eax
+; SSE2-NEXT: pextrw $6, %xmm1, %ecx
+; SSE2-NEXT: shrl %cl, %eax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: pextrw $2, %xmm0, %eax
+; SSE2-NEXT: pextrw $2, %xmm1, %ecx
+; SSE2-NEXT: shrl %cl, %eax
+; SSE2-NEXT: movd %eax, %xmm4
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; SSE2-NEXT: pextrw $4, %xmm0, %eax
+; SSE2-NEXT: pextrw $4, %xmm1, %ecx
+; SSE2-NEXT: shrl %cl, %eax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: movd %xmm1, %ecx
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: movzwl %ax, %eax
+; SSE2-NEXT: shrl %cl, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: retq
+; SSE41: pextrw $1, %xmm0, %eax
+; SSE41-NEXT: pextrw $1, %xmm1, %ecx
+; SSE41-NEXT: shrl %cl, %eax
+; SSE41-NEXT: movd %xmm1, %ecx
+; SSE41-NEXT: movd %xmm0, %edx
+; SSE41-NEXT: movzwl %dx, %edx
+; SSE41-NEXT: shrl %cl, %edx
+; SSE41-NEXT: movd %edx, %xmm2
+; SSE41-NEXT: pinsrw $1, %eax, %xmm2
+; SSE41-NEXT: pextrw $2, %xmm0, %eax
+; SSE41-NEXT: pextrw $2, %xmm1, %ecx
+; SSE41-NEXT: shrl %cl, %eax
+; SSE41-NEXT: pinsrw $2, %eax, %xmm2
+; SSE41-NEXT: pextrw $3, %xmm0, %eax
+; SSE41-NEXT: pextrw $3, %xmm1, %ecx
+; SSE41-NEXT: shrl %cl, %eax
+; SSE41-NEXT: pinsrw $3, %eax, %xmm2
+; SSE41-NEXT: pextrw $4, %xmm0, %eax
+; SSE41-NEXT: pextrw $4, %xmm1, %ecx
+; SSE41-NEXT: shrl %cl, %eax
+; SSE41-NEXT: pinsrw $4, %eax, %xmm2
+; SSE41-NEXT: pextrw $5, %xmm0, %eax
+; SSE41-NEXT: pextrw $5, %xmm1, %ecx
+; SSE41-NEXT: shrl %cl, %eax
+; SSE41-NEXT: pinsrw $5, %eax, %xmm2
+; SSE41-NEXT: pextrw $6, %xmm0, %eax
+; SSE41-NEXT: pextrw $6, %xmm1, %ecx
+; SSE41-NEXT: shrl %cl, %eax
+; SSE41-NEXT: pinsrw $6, %eax, %xmm2
+; SSE41-NEXT: pextrw $7, %xmm0, %eax
+; SSE41-NEXT: pextrw $7, %xmm1, %ecx
+; SSE41-NEXT: shrl %cl, %eax
+; SSE41-NEXT: pinsrw $7, %eax, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+; AVX: vpextrw $1, %xmm0, %eax
+; AVX-NEXT: vpextrw $1, %xmm1, %ecx
+; AVX-NEXT: shrl %cl, %eax
+; AVX-NEXT: vmovd %xmm1, %ecx
+; AVX-NEXT: vmovd %xmm0, %edx
+; AVX-NEXT: movzwl %dx, %edx
+; AVX-NEXT: shrl %cl, %edx
+; AVX-NEXT: vmovd %edx, %xmm2
+; AVX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrw $2, %xmm0, %eax
+; AVX-NEXT: vpextrw $2, %xmm1, %ecx
+; AVX-NEXT: shrl %cl, %eax
+; AVX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrw $3, %xmm0, %eax
+; AVX-NEXT: vpextrw $3, %xmm1, %ecx
+; AVX-NEXT: shrl %cl, %eax
+; AVX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrw $4, %xmm0, %eax
+; AVX-NEXT: vpextrw $4, %xmm1, %ecx
+; AVX-NEXT: shrl %cl, %eax
+; AVX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrw $5, %xmm0, %eax
+; AVX-NEXT: vpextrw $5, %xmm1, %ecx
+; AVX-NEXT: shrl %cl, %eax
+; AVX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrw $6, %xmm0, %eax
+; AVX-NEXT: vpextrw $6, %xmm1, %ecx
+; AVX-NEXT: shrl %cl, %eax
+; AVX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrw $7, %xmm0, %eax
+; AVX-NEXT: vpextrw $7, %xmm1, %ecx
+; AVX-NEXT: shrl %cl, %eax
+; AVX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
+; AVX-NEXT: retq
+ %lshr = lshr <8 x i16> %r, %a
+ %tmp2 = bitcast <8 x i16> %lshr to <2 x i64>
+ ret <2 x i64> %tmp2
+define <2 x i64> @lshr_16i8(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp {
+; SSE2: pushq %rbp
+; SSE2-NEXT: pushq %r15
+; SSE2-NEXT: pushq %r14
+; SSE2-NEXT: pushq %r13
+; SSE2-NEXT: pushq %r12
+; SSE2-NEXT: pushq %rbx
+; SSE2-NEXT: movaps %xmm1, -24(%rsp)
+; SSE2-NEXT: movaps %xmm0, -40(%rsp)
+; SSE2-NEXT: movb -9(%rsp), %cl
+; SSE2-NEXT: movb -25(%rsp), %al
+; SSE2-NEXT: shrb %cl, %al
+; SSE2-NEXT: movzbl %al, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: movb -17(%rsp), %cl
+; SSE2-NEXT: movb -33(%rsp), %al
+; SSE2-NEXT: shrb %cl, %al
+; SSE2-NEXT: movb -13(%rsp), %cl
+; SSE2-NEXT: movzbl %al, %eax
+; SSE2-NEXT: movl %eax, -44(%rsp)
+; SSE2-NEXT: movb -29(%rsp), %al
+; SSE2-NEXT: shrb %cl, %al
+; SSE2-NEXT: movzbl %al, %r9d
+; SSE2-NEXT: movb -21(%rsp), %cl
+; SSE2-NEXT: movb -37(%rsp), %al
+; SSE2-NEXT: shrb %cl, %al
+; SSE2-NEXT: movb -11(%rsp), %cl
+; SSE2-NEXT: movzbl %al, %r10d
+; SSE2-NEXT: movb -27(%rsp), %al
+; SSE2-NEXT: shrb %cl, %al
+; SSE2-NEXT: movb -19(%rsp), %cl
+; SSE2-NEXT: movzbl %al, %r11d
+; SSE2-NEXT: movb -35(%rsp), %al
+; SSE2-NEXT: shrb %cl, %al
+; SSE2-NEXT: movb -15(%rsp), %cl
+; SSE2-NEXT: movzbl %al, %r14d
+; SSE2-NEXT: movb -31(%rsp), %al
+; SSE2-NEXT: shrb %cl, %al
+; SSE2-NEXT: movzbl %al, %r15d
+; SSE2-NEXT: movb -23(%rsp), %cl
+; SSE2-NEXT: movb -39(%rsp), %al
+; SSE2-NEXT: shrb %cl, %al
+; SSE2-NEXT: movb -10(%rsp), %cl
+; SSE2-NEXT: movzbl %al, %r12d
+; SSE2-NEXT: movb -26(%rsp), %al
+; SSE2-NEXT: shrb %cl, %al
+; SSE2-NEXT: movb -18(%rsp), %cl
+; SSE2-NEXT: movzbl %al, %r13d
+; SSE2-NEXT: movb -34(%rsp), %al
+; SSE2-NEXT: shrb %cl, %al
+; SSE2-NEXT: movb -14(%rsp), %cl
+; SSE2-NEXT: movzbl %al, %r8d
+; SSE2-NEXT: movb -30(%rsp), %al
+; SSE2-NEXT: shrb %cl, %al
+; SSE2-NEXT: movb -22(%rsp), %cl
+; SSE2-NEXT: movzbl %al, %ebp
+; SSE2-NEXT: movb -38(%rsp), %al
+; SSE2-NEXT: shrb %cl, %al
+; SSE2-NEXT: movb -12(%rsp), %cl
+; SSE2-NEXT: movzbl %al, %edi
+; SSE2-NEXT: movb -28(%rsp), %dl
+; SSE2-NEXT: shrb %cl, %dl
+; SSE2-NEXT: movb -20(%rsp), %cl
+; SSE2-NEXT: movzbl %dl, %esi
+; SSE2-NEXT: movb -36(%rsp), %bl
+; SSE2-NEXT: shrb %cl, %bl
+; SSE2-NEXT: movb -16(%rsp), %cl
+; SSE2-NEXT: movzbl %bl, %ebx
+; SSE2-NEXT: movb -32(%rsp), %al
+; SSE2-NEXT: shrb %cl, %al
+; SSE2-NEXT: movzbl %al, %edx
+; SSE2-NEXT: movb -24(%rsp), %cl
+; SSE2-NEXT: movb -40(%rsp), %al
+; SSE2-NEXT: shrb %cl, %al
+; SSE2-NEXT: movzbl %al, %eax
+; SSE2-NEXT: movd -44(%rsp), %xmm1
+; SSE2: movd %r9d, %xmm2
+; SSE2-NEXT: movd %r10d, %xmm3
+; SSE2-NEXT: movd %r11d, %xmm4
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: movd %r14d, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE2-NEXT: movd %r15d, %xmm1
+; SSE2-NEXT: movd %r12d, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; SSE2-NEXT: movd %r13d, %xmm0
+; SSE2-NEXT: movd %r8d, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: movd %ebp, %xmm0
+; SSE2-NEXT: movd %edi, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSE2-NEXT: movd %esi, %xmm0
+; SSE2-NEXT: movd %ebx, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: movd %edx, %xmm4
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: popq %rbx
+; SSE2-NEXT: popq %r12
+; SSE2-NEXT: popq %r13
+; SSE2-NEXT: popq %r14
+; SSE2-NEXT: popq %r15
+; SSE2-NEXT: popq %rbp
+; SSE2-NEXT: retq
+; SSE41: pextrb $1, %xmm1, %ecx
+; SSE41-NEXT: pextrb $1, %xmm0, %eax
+; SSE41-NEXT: shrb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pextrb $0, %xmm1, %ecx
+; SSE41-NEXT: pextrb $0, %xmm0, %edx
+; SSE41-NEXT: shrb %cl, %dl
+; SSE41-NEXT: movzbl %dl, %ecx
+; SSE41-NEXT: movd %ecx, %xmm2
+; SSE41-NEXT: pinsrb $1, %eax, %xmm2
+; SSE41-NEXT: pextrb $2, %xmm1, %ecx
+; SSE41-NEXT: pextrb $2, %xmm0, %eax
+; SSE41-NEXT: shrb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $2, %eax, %xmm2
+; SSE41-NEXT: pextrb $3, %xmm1, %ecx
+; SSE41-NEXT: pextrb $3, %xmm0, %eax
+; SSE41-NEXT: shrb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $3, %eax, %xmm2
+; SSE41-NEXT: pextrb $4, %xmm1, %ecx
+; SSE41-NEXT: pextrb $4, %xmm0, %eax
+; SSE41-NEXT: shrb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $4, %eax, %xmm2
+; SSE41-NEXT: pextrb $5, %xmm1, %ecx
+; SSE41-NEXT: pextrb $5, %xmm0, %eax
+; SSE41-NEXT: shrb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $5, %eax, %xmm2
+; SSE41-NEXT: pextrb $6, %xmm1, %ecx
+; SSE41-NEXT: pextrb $6, %xmm0, %eax
+; SSE41-NEXT: shrb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $6, %eax, %xmm2
+; SSE41-NEXT: pextrb $7, %xmm1, %ecx
+; SSE41-NEXT: pextrb $7, %xmm0, %eax
+; SSE41-NEXT: shrb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $7, %eax, %xmm2
+; SSE41-NEXT: pextrb $8, %xmm1, %ecx
+; SSE41-NEXT: pextrb $8, %xmm0, %eax
+; SSE41-NEXT: shrb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $8, %eax, %xmm2
+; SSE41-NEXT: pextrb $9, %xmm1, %ecx
+; SSE41-NEXT: pextrb $9, %xmm0, %eax
+; SSE41-NEXT: shrb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $9, %eax, %xmm2
+; SSE41-NEXT: pextrb $10, %xmm1, %ecx
+; SSE41-NEXT: pextrb $10, %xmm0, %eax
+; SSE41-NEXT: shrb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $10, %eax, %xmm2
+; SSE41-NEXT: pextrb $11, %xmm1, %ecx
+; SSE41-NEXT: pextrb $11, %xmm0, %eax
+; SSE41-NEXT: shrb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $11, %eax, %xmm2
+; SSE41-NEXT: pextrb $12, %xmm1, %ecx
+; SSE41-NEXT: pextrb $12, %xmm0, %eax
+; SSE41-NEXT: shrb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $12, %eax, %xmm2
+; SSE41-NEXT: pextrb $13, %xmm1, %ecx
+; SSE41-NEXT: pextrb $13, %xmm0, %eax
+; SSE41-NEXT: shrb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $13, %eax, %xmm2
+; SSE41-NEXT: pextrb $14, %xmm1, %ecx
+; SSE41-NEXT: pextrb $14, %xmm0, %eax
+; SSE41-NEXT: shrb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $14, %eax, %xmm2
+; SSE41-NEXT: pextrb $15, %xmm1, %ecx
+; SSE41-NEXT: pextrb $15, %xmm0, %eax
+; SSE41-NEXT: shrb %cl, %al
+; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pinsrb $15, %eax, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+; AVX: vpextrb $1, %xmm1, %ecx
+; AVX-NEXT: vpextrb $1, %xmm0, %eax
+; AVX-NEXT: shrb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpextrb $0, %xmm1, %ecx
+; AVX-NEXT: vpextrb $0, %xmm0, %edx
+; AVX-NEXT: shrb %cl, %dl
+; AVX-NEXT: movzbl %dl, %ecx
+; AVX-NEXT: vmovd %ecx, %xmm2
+; AVX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $2, %xmm1, %ecx
+; AVX-NEXT: vpextrb $2, %xmm0, %eax
+; AVX-NEXT: shrb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $3, %xmm1, %ecx
+; AVX-NEXT: vpextrb $3, %xmm0, %eax
+; AVX-NEXT: shrb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $4, %xmm1, %ecx
+; AVX-NEXT: vpextrb $4, %xmm0, %eax
+; AVX-NEXT: shrb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $5, %xmm1, %ecx
+; AVX-NEXT: vpextrb $5, %xmm0, %eax
+; AVX-NEXT: shrb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $6, %xmm1, %ecx
+; AVX-NEXT: vpextrb $6, %xmm0, %eax
+; AVX-NEXT: shrb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $7, %xmm1, %ecx
+; AVX-NEXT: vpextrb $7, %xmm0, %eax
+; AVX-NEXT: shrb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $8, %xmm1, %ecx
+; AVX-NEXT: vpextrb $8, %xmm0, %eax
+; AVX-NEXT: shrb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $9, %xmm1, %ecx
+; AVX-NEXT: vpextrb $9, %xmm0, %eax
+; AVX-NEXT: shrb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $10, %xmm1, %ecx
+; AVX-NEXT: vpextrb $10, %xmm0, %eax
+; AVX-NEXT: shrb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $11, %xmm1, %ecx
+; AVX-NEXT: vpextrb $11, %xmm0, %eax
+; AVX-NEXT: shrb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $12, %xmm1, %ecx
+; AVX-NEXT: vpextrb $12, %xmm0, %eax
+; AVX-NEXT: shrb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $13, %xmm1, %ecx
+; AVX-NEXT: vpextrb $13, %xmm0, %eax
+; AVX-NEXT: shrb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $14, %xmm1, %ecx
+; AVX-NEXT: vpextrb $14, %xmm0, %eax
+; AVX-NEXT: shrb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $15, %xmm1, %ecx
+; AVX-NEXT: vpextrb $15, %xmm0, %eax
+; AVX-NEXT: shrb %cl, %al
+; AVX-NEXT: movzbl %al, %eax
+; AVX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0
+; AVX-NEXT: retq
+ %lshr = lshr <16 x i8> %r, %a
+ %tmp2 = bitcast <16 x i8> %lshr to <2 x i64>
+ ret <2 x i64> %tmp2
diff --git a/test/CodeGen/X86/vector-ctpop.ll b/test/CodeGen/X86/vector-ctpop.ll
deleted file mode 100644
index 59d6792..0000000
--- a/test/CodeGen/X86/vector-ctpop.ll
+++ /dev/null
@@ -1,159 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck -check-prefix=AVX2 %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx -mattr=-popcnt | FileCheck -check-prefix=AVX1-NOPOPCNT %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 -mattr=-popcnt | FileCheck -check-prefix=AVX2-NOPOPCNT %s
-; Vector version of:
-; v = v - ((v >> 1) & 0x55555555)
-; v = (v & 0x33333333) + ((v >> 2) & 0x33333333)
-; v = (v + (v >> 4) & 0xF0F0F0F)
-; v = v + (v >> 8)
-; v = v + (v >> 16)
-; v = v + (v >> 32) ; i64 only
-define <8 x i32> @test0(<8 x i32> %x) {
-; AVX2-LABEL: @test0
-; AVX2: vpsrld $1, %ymm
-; AVX2-NEXT: vpbroadcastd
-; AVX2-NEXT: vpand
-; AVX2-NEXT: vpsubd
-; AVX2-NEXT: vpbroadcastd
-; AVX2-NEXT: vpand
-; AVX2-NEXT: vpsrld $2
-; AVX2-NEXT: vpand
-; AVX2-NEXT: vpaddd
-; AVX2-NEXT: vpsrld $4
-; AVX2-NEXT: vpaddd
-; AVX2-NEXT: vpbroadcastd
-; AVX2-NEXT: vpand
-; AVX2-NEXT: vpsrld $8
-; AVX2-NEXT: vpaddd
-; AVX2-NEXT: vpsrld $16
-; AVX2-NEXT: vpaddd
-; AVX2-NEXT: vpbroadcastd
-; AVX2-NEXT: vpand
- %y = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %x)
- ret <8 x i32> %y
-define <4 x i64> @test1(<4 x i64> %x) {
-; AVX2-NOPOPCNT: vpsrlq $1, %ymm
-; AVX2-NOPOPCNT-NEXT: vpbroadcastq
-; AVX2-NOPOPCNT-NEXT: vpbroadcastq
-; AVX2-NOPOPCNT-NEXT: vpsrlq $2
-; AVX2-NOPOPCNT-NEXT: vpsrlq $4
-; AVX2-NOPOPCNT-NEXT: vpbroadcastq
-; AVX2-NOPOPCNT-NEXT: vpsrlq $8
-; AVX2-NOPOPCNT-NEXT: vpsrlq $16
-; AVX2-NOPOPCNT-NEXT: vpsrlq $32
-; AVX2-NOPOPCNT-NEXT: vpbroadcastq
- %y = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %x)
- ret <4 x i64> %y
-define <4 x i32> @test2(<4 x i32> %x) {
-; AVX2-NOPOPCNT: vpsrld $1, %xmm
-; AVX2-NOPOPCNT-NEXT: vpbroadcastd
-; AVX2-NOPOPCNT-NEXT: vpbroadcastd
-; AVX2-NOPOPCNT-NEXT: vpsrld $2
-; AVX2-NOPOPCNT-NEXT: vpsrld $4
-; AVX2-NOPOPCNT-NEXT: vpbroadcastd
-; AVX2-NOPOPCNT-NEXT: vpsrld $8
-; AVX2-NOPOPCNT-NEXT: vpsrld $16
-; AVX2-NOPOPCNT-NEXT: vpbroadcastd
-; AVX1-NOPOPCNT: vpsrld $1, %xmm
-; AVX1-NOPOPCNT-NEXT: vmovdqa
-; AVX1-NOPOPCNT-NEXT: vpsrld $2
-; AVX1-NOPOPCNT-NEXT: vpsrld $4
-; AVX1-NOPOPCNT-NEXT: vpsrld $8
-; AVX1-NOPOPCNT-NEXT: vpsrld $16
- %y = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %x)
- ret <4 x i32> %y
-define <2 x i64> @test3(<2 x i64> %x) {
-; AVX2-NOPOPCNT: vpsrlq $1, %xmm
-; AVX2-NOPOPCNT-NEXT: vmovdqa
-; AVX2-NOPOPCNT-NEXT: vpsrlq $2
-; AVX2-NOPOPCNT-NEXT: vpsrlq $4
-; AVX2-NOPOPCNT-NEXT: vpsrlq $8
-; AVX2-NOPOPCNT-NEXT: vpsrlq $16
-; AVX2-NOPOPCNT-NEXT: vpsrlq $32
-; AVX1-NOPOPCNT: vpsrlq $1, %xmm
-; AVX1-NOPOPCNT-NEXT: vmovdqa
-; AVX1-NOPOPCNT-NEXT: vpsrlq $2
-; AVX1-NOPOPCNT-NEXT: vpsrlq $4
-; AVX1-NOPOPCNT-NEXT: vpsrlq $8
-; AVX1-NOPOPCNT-NEXT: vpsrlq $16
-; AVX1-NOPOPCNT-NEXT: vpsrlq $32
- %y = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %x)
- ret <2 x i64> %y
-declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
-declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
-declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
-declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
diff --git a/test/CodeGen/X86/vector-lzcnt-128.ll b/test/CodeGen/X86/vector-lzcnt-128.ll
new file mode 100644
index 0000000..b43188b
--- /dev/null
+++ b/test/CodeGen/X86/vector-lzcnt-128.ll
@@ -0,0 +1,1915 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+target triple = "x86_64-unknown-unknown"
+define <2 x i64> @testv2i64(<2 x i64> %in) {
+; SSE2-LABEL: testv2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: bsrq %rax, %rax
+; SSE2-NEXT: movl $127, %ecx
+; SSE2-NEXT: cmoveq %rcx, %rax
+; SSE2-NEXT: xorq $63, %rax
+; SSE2-NEXT: movd %rax, %xmm1
+; SSE2-NEXT: pshufd $78, %xmm0, %xmm0 # xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: bsrq %rax, %rax
+; SSE2-NEXT: cmoveq %rcx, %rax
+; SSE2-NEXT: xorq $63, %rax
+; SSE2-NEXT: movd %rax, %xmm0
+; SSE2-NEXT: punpcklqdq %xmm0, %xmm1 # xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv2i64:
+; SSE3: # BB#0:
+; SSE3-NEXT: movd %xmm0, %rax
+; SSE3-NEXT: bsrq %rax, %rax
+; SSE3-NEXT: movl $127, %ecx
+; SSE3-NEXT: cmoveq %rcx, %rax
+; SSE3-NEXT: xorq $63, %rax
+; SSE3-NEXT: movd %rax, %xmm1
+; SSE3-NEXT: pshufd $78, %xmm0, %xmm0 # xmm0 = xmm0[2,3,0,1]
+; SSE3-NEXT: movd %xmm0, %rax
+; SSE3-NEXT: bsrq %rax, %rax
+; SSE3-NEXT: cmoveq %rcx, %rax
+; SSE3-NEXT: xorq $63, %rax
+; SSE3-NEXT: movd %rax, %xmm0
+; SSE3-NEXT: punpcklqdq %xmm0, %xmm1 # xmm1 = xmm1[0],xmm0[0]
+; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv2i64:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd %xmm0, %rax
+; SSSE3-NEXT: bsrq %rax, %rax
+; SSSE3-NEXT: movl $127, %ecx
+; SSSE3-NEXT: cmoveq %rcx, %rax
+; SSSE3-NEXT: xorq $63, %rax
+; SSSE3-NEXT: movd %rax, %xmm1
+; SSSE3-NEXT: pshufd $78, %xmm0, %xmm0 # xmm0 = xmm0[2,3,0,1]
+; SSSE3-NEXT: movd %xmm0, %rax
+; SSSE3-NEXT: bsrq %rax, %rax
+; SSSE3-NEXT: cmoveq %rcx, %rax
+; SSSE3-NEXT: xorq $63, %rax
+; SSSE3-NEXT: movd %rax, %xmm0
+; SSSE3-NEXT: punpcklqdq %xmm0, %xmm1 # xmm1 = xmm1[0],xmm0[0]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: pextrq $1, %xmm0, %rax
+; SSE41-NEXT: bsrq %rax, %rax
+; SSE41-NEXT: movl $127, %ecx
+; SSE41-NEXT: cmoveq %rcx, %rax
+; SSE41-NEXT: xorq $63, %rax
+; SSE41-NEXT: movd %rax, %xmm1
+; SSE41-NEXT: movd %xmm0, %rax
+; SSE41-NEXT: bsrq %rax, %rax
+; SSE41-NEXT: cmoveq %rcx, %rax
+; SSE41-NEXT: xorq $63, %rax
+; SSE41-NEXT: movd %rax, %xmm0
+; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT: retq
+; AVX-LABEL: testv2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: bsrq %rax, %rax
+; AVX-NEXT: movl $127, %ecx
+; AVX-NEXT: cmoveq %rcx, %rax
+; AVX-NEXT: xorq $63, %rax
+; AVX-NEXT: vmovq %rax, %xmm1
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: bsrq %rax, %rax
+; AVX-NEXT: cmoveq %rcx, %rax
+; AVX-NEXT: xorq $63, %rax
+; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
+ %out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %in, i1 0)
+ ret <2 x i64> %out
+define <2 x i64> @testv2i64u(<2 x i64> %in) {
+; SSE2-LABEL: testv2i64u:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: bsrq %rax, %rax
+; SSE2-NEXT: xorq $63, %rax
+; SSE2-NEXT: movd %rax, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: bsrq %rax, %rax
+; SSE2-NEXT: xorq $63, %rax
+; SSE2-NEXT: movd %rax, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv2i64u:
+; SSE3: # BB#0:
+; SSE3-NEXT: movd %xmm0, %rax
+; SSE3-NEXT: bsrq %rax, %rax
+; SSE3-NEXT: xorq $63, %rax
+; SSE3-NEXT: movd %rax, %xmm1
+; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE3-NEXT: movd %xmm0, %rax
+; SSE3-NEXT: bsrq %rax, %rax
+; SSE3-NEXT: xorq $63, %rax
+; SSE3-NEXT: movd %rax, %xmm0
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv2i64u:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd %xmm0, %rax
+; SSSE3-NEXT: bsrq %rax, %rax
+; SSSE3-NEXT: xorq $63, %rax
+; SSSE3-NEXT: movd %rax, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSSE3-NEXT: movd %xmm0, %rax
+; SSSE3-NEXT: bsrq %rax, %rax
+; SSSE3-NEXT: xorq $63, %rax
+; SSSE3-NEXT: movd %rax, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv2i64u:
+; SSE41: # BB#0:
+; SSE41-NEXT: pextrq $1, %xmm0, %rax
+; SSE41-NEXT: bsrq %rax, %rax
+; SSE41-NEXT: xorq $63, %rax
+; SSE41-NEXT: movd %rax, %xmm1
+; SSE41-NEXT: movd %xmm0, %rax
+; SSE41-NEXT: bsrq %rax, %rax
+; SSE41-NEXT: xorq $63, %rax
+; SSE41-NEXT: movd %rax, %xmm0
+; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT: retq
+; AVX-LABEL: testv2i64u:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: bsrq %rax, %rax
+; AVX-NEXT: xorq $63, %rax
+; AVX-NEXT: vmovq %rax, %xmm1
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: bsrq %rax, %rax
+; AVX-NEXT: xorq $63, %rax
+; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
+ %out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %in, i1 -1)
+ ret <2 x i64> %out
+define <4 x i32> @testv4i32(<4 x i32> %in) {
+; SSE2-LABEL: testv4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: bsrl %eax, %eax
+; SSE2-NEXT: movl $63, %ecx
+; SSE2-NEXT: cmovel %ecx, %eax
+; SSE2-NEXT: xorl $31, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; SSE2-NEXT: movd %xmm2, %eax
+; SSE2-NEXT: bsrl %eax, %eax
+; SSE2-NEXT: cmovel %ecx, %eax
+; SSE2-NEXT: xorl $31, %eax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: bsrl %eax, %eax
+; SSE2-NEXT: cmovel %ecx, %eax
+; SSE2-NEXT: xorl $31, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: bsrl %eax, %eax
+; SSE2-NEXT: cmovel %ecx, %eax
+; SSE2-NEXT: xorl $31, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv4i32:
+; SSE3: # BB#0:
+; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; SSE3-NEXT: movd %xmm1, %eax
+; SSE3-NEXT: bsrl %eax, %eax
+; SSE3-NEXT: movl $63, %ecx
+; SSE3-NEXT: cmovel %ecx, %eax
+; SSE3-NEXT: xorl $31, %eax
+; SSE3-NEXT: movd %eax, %xmm1
+; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; SSE3-NEXT: movd %xmm2, %eax
+; SSE3-NEXT: bsrl %eax, %eax
+; SSE3-NEXT: cmovel %ecx, %eax
+; SSE3-NEXT: xorl $31, %eax
+; SSE3-NEXT: movd %eax, %xmm2
+; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: bsrl %eax, %eax
+; SSE3-NEXT: cmovel %ecx, %eax
+; SSE3-NEXT: xorl $31, %eax
+; SSE3-NEXT: movd %eax, %xmm1
+; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: bsrl %eax, %eax
+; SSE3-NEXT: cmovel %ecx, %eax
+; SSE3-NEXT: xorl $31, %eax
+; SSE3-NEXT: movd %eax, %xmm0
+; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv4i32:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; SSSE3-NEXT: movd %xmm1, %eax
+; SSSE3-NEXT: bsrl %eax, %eax
+; SSSE3-NEXT: movl $63, %ecx
+; SSSE3-NEXT: cmovel %ecx, %eax
+; SSSE3-NEXT: xorl $31, %eax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; SSSE3-NEXT: movd %xmm2, %eax
+; SSSE3-NEXT: bsrl %eax, %eax
+; SSSE3-NEXT: cmovel %ecx, %eax
+; SSSE3-NEXT: xorl $31, %eax
+; SSSE3-NEXT: movd %eax, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT: movd %xmm0, %eax
+; SSSE3-NEXT: bsrl %eax, %eax
+; SSSE3-NEXT: cmovel %ecx, %eax
+; SSSE3-NEXT: xorl $31, %eax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSSE3-NEXT: movd %xmm0, %eax
+; SSSE3-NEXT: bsrl %eax, %eax
+; SSSE3-NEXT: cmovel %ecx, %eax
+; SSSE3-NEXT: xorl $31, %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pextrd $1, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: movl $63, %ecx
+; SSE41-NEXT: cmovel %ecx, %eax
+; SSE41-NEXT: xorl $31, %eax
+; SSE41-NEXT: movd %xmm0, %edx
+; SSE41-NEXT: bsrl %edx, %edx
+; SSE41-NEXT: cmovel %ecx, %edx
+; SSE41-NEXT: xorl $31, %edx
+; SSE41-NEXT: movd %edx, %xmm1
+; SSE41-NEXT: pinsrd $1, %eax, %xmm1
+; SSE41-NEXT: pextrd $2, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: cmovel %ecx, %eax
+; SSE41-NEXT: xorl $31, %eax
+; SSE41-NEXT: pinsrd $2, %eax, %xmm1
+; SSE41-NEXT: pextrd $3, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: cmovel %ecx, %eax
+; SSE41-NEXT: xorl $31, %eax
+; SSE41-NEXT: pinsrd $3, %eax, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+; AVX-LABEL: testv4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrd $1, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: movl $63, %ecx
+; AVX-NEXT: cmovel %ecx, %eax
+; AVX-NEXT: xorl $31, %eax
+; AVX-NEXT: vmovd %xmm0, %edx
+; AVX-NEXT: bsrl %edx, %edx
+; AVX-NEXT: cmovel %ecx, %edx
+; AVX-NEXT: xorl $31, %edx
+; AVX-NEXT: vmovd %edx, %xmm1
+; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrd $2, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: cmovel %ecx, %eax
+; AVX-NEXT: xorl $31, %eax
+; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrd $3, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: cmovel %ecx, %eax
+; AVX-NEXT: xorl $31, %eax
+; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %in, i1 0)
+ ret <4 x i32> %out
+define <4 x i32> @testv4i32u(<4 x i32> %in) {
+; SSE2-LABEL: testv4i32u:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: bsrl %eax, %eax
+; SSE2-NEXT: xorl $31, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; SSE2-NEXT: movd %xmm2, %eax
+; SSE2-NEXT: bsrl %eax, %eax
+; SSE2-NEXT: xorl $31, %eax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: bsrl %eax, %eax
+; SSE2-NEXT: xorl $31, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: bsrl %eax, %eax
+; SSE2-NEXT: xorl $31, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv4i32u:
+; SSE3: # BB#0:
+; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; SSE3-NEXT: movd %xmm1, %eax
+; SSE3-NEXT: bsrl %eax, %eax
+; SSE3-NEXT: xorl $31, %eax
+; SSE3-NEXT: movd %eax, %xmm1
+; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; SSE3-NEXT: movd %xmm2, %eax
+; SSE3-NEXT: bsrl %eax, %eax
+; SSE3-NEXT: xorl $31, %eax
+; SSE3-NEXT: movd %eax, %xmm2
+; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: bsrl %eax, %eax
+; SSE3-NEXT: xorl $31, %eax
+; SSE3-NEXT: movd %eax, %xmm1
+; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: bsrl %eax, %eax
+; SSE3-NEXT: xorl $31, %eax
+; SSE3-NEXT: movd %eax, %xmm0
+; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv4i32u:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; SSSE3-NEXT: movd %xmm1, %eax
+; SSSE3-NEXT: bsrl %eax, %eax
+; SSSE3-NEXT: xorl $31, %eax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; SSSE3-NEXT: movd %xmm2, %eax
+; SSSE3-NEXT: bsrl %eax, %eax
+; SSSE3-NEXT: xorl $31, %eax
+; SSSE3-NEXT: movd %eax, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT: movd %xmm0, %eax
+; SSSE3-NEXT: bsrl %eax, %eax
+; SSSE3-NEXT: xorl $31, %eax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSSE3-NEXT: movd %xmm0, %eax
+; SSSE3-NEXT: bsrl %eax, %eax
+; SSSE3-NEXT: xorl $31, %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv4i32u:
+; SSE41: # BB#0:
+; SSE41-NEXT: pextrd $1, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: xorl $31, %eax
+; SSE41-NEXT: movd %xmm0, %ecx
+; SSE41-NEXT: bsrl %ecx, %ecx
+; SSE41-NEXT: xorl $31, %ecx
+; SSE41-NEXT: movd %ecx, %xmm1
+; SSE41-NEXT: pinsrd $1, %eax, %xmm1
+; SSE41-NEXT: pextrd $2, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: xorl $31, %eax
+; SSE41-NEXT: pinsrd $2, %eax, %xmm1
+; SSE41-NEXT: pextrd $3, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: xorl $31, %eax
+; SSE41-NEXT: pinsrd $3, %eax, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+; AVX-LABEL: testv4i32u:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrd $1, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: xorl $31, %eax
+; AVX-NEXT: vmovd %xmm0, %ecx
+; AVX-NEXT: bsrl %ecx, %ecx
+; AVX-NEXT: xorl $31, %ecx
+; AVX-NEXT: vmovd %ecx, %xmm1
+; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrd $2, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: xorl $31, %eax
+; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrd $3, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: xorl $31, %eax
+; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %in, i1 -1)
+ ret <4 x i32> %out
+define <8 x i16> @testv8i16(<8 x i16> %in) {
+; SSE2-LABEL: testv8i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: pextrw $7, %xmm0, %eax
+; SSE2-NEXT: bsrw %ax, %cx
+; SSE2-NEXT: movw $31, %ax
+; SSE2-NEXT: cmovew %ax, %cx
+; SSE2-NEXT: xorl $15, %ecx
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: pextrw $3, %xmm0, %ecx
+; SSE2-NEXT: bsrw %cx, %cx
+; SSE2-NEXT: cmovew %ax, %cx
+; SSE2-NEXT: xorl $15, %ecx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: pextrw $5, %xmm0, %ecx
+; SSE2-NEXT: bsrw %cx, %cx
+; SSE2-NEXT: cmovew %ax, %cx
+; SSE2-NEXT: xorl $15, %ecx
+; SSE2-NEXT: movd %ecx, %xmm3
+; SSE2-NEXT: pextrw $1, %xmm0, %ecx
+; SSE2-NEXT: bsrw %cx, %cx
+; SSE2-NEXT: cmovew %ax, %cx
+; SSE2-NEXT: xorl $15, %ecx
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT: pextrw $6, %xmm0, %ecx
+; SSE2-NEXT: bsrw %cx, %cx
+; SSE2-NEXT: cmovew %ax, %cx
+; SSE2-NEXT: xorl $15, %ecx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: pextrw $2, %xmm0, %ecx
+; SSE2-NEXT: bsrw %cx, %cx
+; SSE2-NEXT: cmovew %ax, %cx
+; SSE2-NEXT: xorl $15, %ecx
+; SSE2-NEXT: movd %ecx, %xmm3
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE2-NEXT: pextrw $4, %xmm0, %ecx
+; SSE2-NEXT: bsrw %cx, %cx
+; SSE2-NEXT: cmovew %ax, %cx
+; SSE2-NEXT: xorl $15, %ecx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: movd %xmm0, %ecx
+; SSE2-NEXT: bsrw %cx, %cx
+; SSE2-NEXT: cmovew %ax, %cx
+; SSE2-NEXT: xorl $15, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv8i16:
+; SSE3: # BB#0:
+; SSE3-NEXT: pextrw $7, %xmm0, %eax
+; SSE3-NEXT: bsrw %ax, %cx
+; SSE3-NEXT: movw $31, %ax
+; SSE3-NEXT: cmovew %ax, %cx
+; SSE3-NEXT: xorl $15, %ecx
+; SSE3-NEXT: movd %ecx, %xmm1
+; SSE3-NEXT: pextrw $3, %xmm0, %ecx
+; SSE3-NEXT: bsrw %cx, %cx
+; SSE3-NEXT: cmovew %ax, %cx
+; SSE3-NEXT: xorl $15, %ecx
+; SSE3-NEXT: movd %ecx, %xmm2
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE3-NEXT: pextrw $5, %xmm0, %ecx
+; SSE3-NEXT: bsrw %cx, %cx
+; SSE3-NEXT: cmovew %ax, %cx
+; SSE3-NEXT: xorl $15, %ecx
+; SSE3-NEXT: movd %ecx, %xmm3
+; SSE3-NEXT: pextrw $1, %xmm0, %ecx
+; SSE3-NEXT: bsrw %cx, %cx
+; SSE3-NEXT: cmovew %ax, %cx
+; SSE3-NEXT: xorl $15, %ecx
+; SSE3-NEXT: movd %ecx, %xmm1
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE3-NEXT: pextrw $6, %xmm0, %ecx
+; SSE3-NEXT: bsrw %cx, %cx
+; SSE3-NEXT: cmovew %ax, %cx
+; SSE3-NEXT: xorl $15, %ecx
+; SSE3-NEXT: movd %ecx, %xmm2
+; SSE3-NEXT: pextrw $2, %xmm0, %ecx
+; SSE3-NEXT: bsrw %cx, %cx
+; SSE3-NEXT: cmovew %ax, %cx
+; SSE3-NEXT: xorl $15, %ecx
+; SSE3-NEXT: movd %ecx, %xmm3
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE3-NEXT: pextrw $4, %xmm0, %ecx
+; SSE3-NEXT: bsrw %cx, %cx
+; SSE3-NEXT: cmovew %ax, %cx
+; SSE3-NEXT: xorl $15, %ecx
+; SSE3-NEXT: movd %ecx, %xmm2
+; SSE3-NEXT: movd %xmm0, %ecx
+; SSE3-NEXT: bsrw %cx, %cx
+; SSE3-NEXT: cmovew %ax, %cx
+; SSE3-NEXT: xorl $15, %ecx
+; SSE3-NEXT: movd %ecx, %xmm0
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv8i16:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pextrw $7, %xmm0, %eax
+; SSSE3-NEXT: bsrw %ax, %cx
+; SSSE3-NEXT: movw $31, %ax
+; SSSE3-NEXT: cmovew %ax, %cx
+; SSSE3-NEXT: xorl $15, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: pextrw $3, %xmm0, %ecx
+; SSSE3-NEXT: bsrw %cx, %cx
+; SSSE3-NEXT: cmovew %ax, %cx
+; SSSE3-NEXT: xorl $15, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT: pextrw $5, %xmm0, %ecx
+; SSSE3-NEXT: bsrw %cx, %cx
+; SSSE3-NEXT: cmovew %ax, %cx
+; SSSE3-NEXT: xorl $15, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm3
+; SSSE3-NEXT: pextrw $1, %xmm0, %ecx
+; SSSE3-NEXT: bsrw %cx, %cx
+; SSSE3-NEXT: cmovew %ax, %cx
+; SSSE3-NEXT: xorl $15, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT: pextrw $6, %xmm0, %ecx
+; SSSE3-NEXT: bsrw %cx, %cx
+; SSSE3-NEXT: cmovew %ax, %cx
+; SSSE3-NEXT: xorl $15, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: pextrw $2, %xmm0, %ecx
+; SSSE3-NEXT: bsrw %cx, %cx
+; SSSE3-NEXT: cmovew %ax, %cx
+; SSSE3-NEXT: xorl $15, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm3
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSSE3-NEXT: pextrw $4, %xmm0, %ecx
+; SSSE3-NEXT: bsrw %cx, %cx
+; SSSE3-NEXT: cmovew %ax, %cx
+; SSSE3-NEXT: xorl $15, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: movd %xmm0, %ecx
+; SSSE3-NEXT: bsrw %cx, %cx
+; SSSE3-NEXT: cmovew %ax, %cx
+; SSSE3-NEXT: xorl $15, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv8i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: pextrw $1, %xmm0, %eax
+; SSE41-NEXT: bsrw %ax, %cx
+; SSE41-NEXT: movw $31, %ax
+; SSE41-NEXT: cmovew %ax, %cx
+; SSE41-NEXT: xorl $15, %ecx
+; SSE41-NEXT: movd %xmm0, %edx
+; SSE41-NEXT: bsrw %dx, %dx
+; SSE41-NEXT: cmovew %ax, %dx
+; SSE41-NEXT: xorl $15, %edx
+; SSE41-NEXT: movd %edx, %xmm1
+; SSE41-NEXT: pinsrw $1, %ecx, %xmm1
+; SSE41-NEXT: pextrw $2, %xmm0, %ecx
+; SSE41-NEXT: bsrw %cx, %cx
+; SSE41-NEXT: cmovew %ax, %cx
+; SSE41-NEXT: xorl $15, %ecx
+; SSE41-NEXT: pinsrw $2, %ecx, %xmm1
+; SSE41-NEXT: pextrw $3, %xmm0, %ecx
+; SSE41-NEXT: bsrw %cx, %cx
+; SSE41-NEXT: cmovew %ax, %cx
+; SSE41-NEXT: xorl $15, %ecx
+; SSE41-NEXT: pinsrw $3, %ecx, %xmm1
+; SSE41-NEXT: pextrw $4, %xmm0, %ecx
+; SSE41-NEXT: bsrw %cx, %cx
+; SSE41-NEXT: cmovew %ax, %cx
+; SSE41-NEXT: xorl $15, %ecx
+; SSE41-NEXT: pinsrw $4, %ecx, %xmm1
+; SSE41-NEXT: pextrw $5, %xmm0, %ecx
+; SSE41-NEXT: bsrw %cx, %cx
+; SSE41-NEXT: cmovew %ax, %cx
+; SSE41-NEXT: xorl $15, %ecx
+; SSE41-NEXT: pinsrw $5, %ecx, %xmm1
+; SSE41-NEXT: pextrw $6, %xmm0, %ecx
+; SSE41-NEXT: bsrw %cx, %cx
+; SSE41-NEXT: cmovew %ax, %cx
+; SSE41-NEXT: xorl $15, %ecx
+; SSE41-NEXT: pinsrw $6, %ecx, %xmm1
+; SSE41-NEXT: pextrw $7, %xmm0, %ecx
+; SSE41-NEXT: bsrw %cx, %cx
+; SSE41-NEXT: cmovew %ax, %cx
+; SSE41-NEXT: xorl $15, %ecx
+; SSE41-NEXT: pinsrw $7, %ecx, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+; AVX-LABEL: testv8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrw $1, %xmm0, %eax
+; AVX-NEXT: bsrw %ax, %cx
+; AVX-NEXT: movw $31, %ax
+; AVX-NEXT: cmovew %ax, %cx
+; AVX-NEXT: xorl $15, %ecx
+; AVX-NEXT: vmovd %xmm0, %edx
+; AVX-NEXT: bsrw %dx, %dx
+; AVX-NEXT: cmovew %ax, %dx
+; AVX-NEXT: xorl $15, %edx
+; AVX-NEXT: vmovd %edx, %xmm1
+; AVX-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $2, %xmm0, %ecx
+; AVX-NEXT: bsrw %cx, %cx
+; AVX-NEXT: cmovew %ax, %cx
+; AVX-NEXT: xorl $15, %ecx
+; AVX-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $3, %xmm0, %ecx
+; AVX-NEXT: bsrw %cx, %cx
+; AVX-NEXT: cmovew %ax, %cx
+; AVX-NEXT: xorl $15, %ecx
+; AVX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $4, %xmm0, %ecx
+; AVX-NEXT: bsrw %cx, %cx
+; AVX-NEXT: cmovew %ax, %cx
+; AVX-NEXT: xorl $15, %ecx
+; AVX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $5, %xmm0, %ecx
+; AVX-NEXT: bsrw %cx, %cx
+; AVX-NEXT: cmovew %ax, %cx
+; AVX-NEXT: xorl $15, %ecx
+; AVX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $6, %xmm0, %ecx
+; AVX-NEXT: bsrw %cx, %cx
+; AVX-NEXT: cmovew %ax, %cx
+; AVX-NEXT: xorl $15, %ecx
+; AVX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $7, %xmm0, %ecx
+; AVX-NEXT: bsrw %cx, %cx
+; AVX-NEXT: cmovew %ax, %cx
+; AVX-NEXT: xorl $15, %ecx
+; AVX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %in, i1 0)
+ ret <8 x i16> %out
+define <8 x i16> @testv8i16u(<8 x i16> %in) {
+; SSE2-LABEL: testv8i16u:
+; SSE2: # BB#0:
+; SSE2-NEXT: pextrw $7, %xmm0, %eax
+; SSE2-NEXT: bsrw %ax, %ax
+; SSE2-NEXT: xorl $15, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: pextrw $3, %xmm0, %eax
+; SSE2-NEXT: bsrw %ax, %ax
+; SSE2-NEXT: xorl $15, %eax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: pextrw $5, %xmm0, %eax
+; SSE2-NEXT: bsrw %ax, %ax
+; SSE2-NEXT: xorl $15, %eax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: pextrw $1, %xmm0, %eax
+; SSE2-NEXT: bsrw %ax, %ax
+; SSE2-NEXT: xorl $15, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT: pextrw $6, %xmm0, %eax
+; SSE2-NEXT: bsrw %ax, %ax
+; SSE2-NEXT: xorl $15, %eax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: pextrw $2, %xmm0, %eax
+; SSE2-NEXT: bsrw %ax, %ax
+; SSE2-NEXT: xorl $15, %eax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE2-NEXT: pextrw $4, %xmm0, %eax
+; SSE2-NEXT: bsrw %ax, %ax
+; SSE2-NEXT: xorl $15, %eax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: bsrw %ax, %ax
+; SSE2-NEXT: xorl $15, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv8i16u:
+; SSE3: # BB#0:
+; SSE3-NEXT: pextrw $7, %xmm0, %eax
+; SSE3-NEXT: bsrw %ax, %ax
+; SSE3-NEXT: xorl $15, %eax
+; SSE3-NEXT: movd %eax, %xmm1
+; SSE3-NEXT: pextrw $3, %xmm0, %eax
+; SSE3-NEXT: bsrw %ax, %ax
+; SSE3-NEXT: xorl $15, %eax
+; SSE3-NEXT: movd %eax, %xmm2
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE3-NEXT: pextrw $5, %xmm0, %eax
+; SSE3-NEXT: bsrw %ax, %ax
+; SSE3-NEXT: xorl $15, %eax
+; SSE3-NEXT: movd %eax, %xmm3
+; SSE3-NEXT: pextrw $1, %xmm0, %eax
+; SSE3-NEXT: bsrw %ax, %ax
+; SSE3-NEXT: xorl $15, %eax
+; SSE3-NEXT: movd %eax, %xmm1
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE3-NEXT: pextrw $6, %xmm0, %eax
+; SSE3-NEXT: bsrw %ax, %ax
+; SSE3-NEXT: xorl $15, %eax
+; SSE3-NEXT: movd %eax, %xmm2
+; SSE3-NEXT: pextrw $2, %xmm0, %eax
+; SSE3-NEXT: bsrw %ax, %ax
+; SSE3-NEXT: xorl $15, %eax
+; SSE3-NEXT: movd %eax, %xmm3
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE3-NEXT: pextrw $4, %xmm0, %eax
+; SSE3-NEXT: bsrw %ax, %ax
+; SSE3-NEXT: xorl $15, %eax
+; SSE3-NEXT: movd %eax, %xmm2
+; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: bsrw %ax, %ax
+; SSE3-NEXT: xorl $15, %eax
+; SSE3-NEXT: movd %eax, %xmm0
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv8i16u:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pextrw $7, %xmm0, %eax
+; SSSE3-NEXT: bsrw %ax, %ax
+; SSSE3-NEXT: xorl $15, %eax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: pextrw $3, %xmm0, %eax
+; SSSE3-NEXT: bsrw %ax, %ax
+; SSSE3-NEXT: xorl $15, %eax
+; SSSE3-NEXT: movd %eax, %xmm2
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT: pextrw $5, %xmm0, %eax
+; SSSE3-NEXT: bsrw %ax, %ax
+; SSSE3-NEXT: xorl $15, %eax
+; SSSE3-NEXT: movd %eax, %xmm3
+; SSSE3-NEXT: pextrw $1, %xmm0, %eax
+; SSSE3-NEXT: bsrw %ax, %ax
+; SSSE3-NEXT: xorl $15, %eax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT: pextrw $6, %xmm0, %eax
+; SSSE3-NEXT: bsrw %ax, %ax
+; SSSE3-NEXT: xorl $15, %eax
+; SSSE3-NEXT: movd %eax, %xmm2
+; SSSE3-NEXT: pextrw $2, %xmm0, %eax
+; SSSE3-NEXT: bsrw %ax, %ax
+; SSSE3-NEXT: xorl $15, %eax
+; SSSE3-NEXT: movd %eax, %xmm3
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSSE3-NEXT: pextrw $4, %xmm0, %eax
+; SSSE3-NEXT: bsrw %ax, %ax
+; SSSE3-NEXT: xorl $15, %eax
+; SSSE3-NEXT: movd %eax, %xmm2
+; SSSE3-NEXT: movd %xmm0, %eax
+; SSSE3-NEXT: bsrw %ax, %ax
+; SSSE3-NEXT: xorl $15, %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv8i16u:
+; SSE41: # BB#0:
+; SSE41-NEXT: pextrw $1, %xmm0, %eax
+; SSE41-NEXT: bsrw %ax, %ax
+; SSE41-NEXT: xorl $15, %eax
+; SSE41-NEXT: movd %xmm0, %ecx
+; SSE41-NEXT: bsrw %cx, %cx
+; SSE41-NEXT: xorl $15, %ecx
+; SSE41-NEXT: movd %ecx, %xmm1
+; SSE41-NEXT: pinsrw $1, %eax, %xmm1
+; SSE41-NEXT: pextrw $2, %xmm0, %eax
+; SSE41-NEXT: bsrw %ax, %ax
+; SSE41-NEXT: xorl $15, %eax
+; SSE41-NEXT: pinsrw $2, %eax, %xmm1
+; SSE41-NEXT: pextrw $3, %xmm0, %eax
+; SSE41-NEXT: bsrw %ax, %ax
+; SSE41-NEXT: xorl $15, %eax
+; SSE41-NEXT: pinsrw $3, %eax, %xmm1
+; SSE41-NEXT: pextrw $4, %xmm0, %eax
+; SSE41-NEXT: bsrw %ax, %ax
+; SSE41-NEXT: xorl $15, %eax
+; SSE41-NEXT: pinsrw $4, %eax, %xmm1
+; SSE41-NEXT: pextrw $5, %xmm0, %eax
+; SSE41-NEXT: bsrw %ax, %ax
+; SSE41-NEXT: xorl $15, %eax
+; SSE41-NEXT: pinsrw $5, %eax, %xmm1
+; SSE41-NEXT: pextrw $6, %xmm0, %eax
+; SSE41-NEXT: bsrw %ax, %ax
+; SSE41-NEXT: xorl $15, %eax
+; SSE41-NEXT: pinsrw $6, %eax, %xmm1
+; SSE41-NEXT: pextrw $7, %xmm0, %eax
+; SSE41-NEXT: bsrw %ax, %ax
+; SSE41-NEXT: xorl $15, %eax
+; SSE41-NEXT: pinsrw $7, %eax, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+; AVX-LABEL: testv8i16u:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrw $1, %xmm0, %eax
+; AVX-NEXT: bsrw %ax, %ax
+; AVX-NEXT: xorl $15, %eax
+; AVX-NEXT: vmovd %xmm0, %ecx
+; AVX-NEXT: bsrw %cx, %cx
+; AVX-NEXT: xorl $15, %ecx
+; AVX-NEXT: vmovd %ecx, %xmm1
+; AVX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $2, %xmm0, %eax
+; AVX-NEXT: bsrw %ax, %ax
+; AVX-NEXT: xorl $15, %eax
+; AVX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $3, %xmm0, %eax
+; AVX-NEXT: bsrw %ax, %ax
+; AVX-NEXT: xorl $15, %eax
+; AVX-NEXT: vpinsrw $3, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $4, %xmm0, %eax
+; AVX-NEXT: bsrw %ax, %ax
+; AVX-NEXT: xorl $15, %eax
+; AVX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $5, %xmm0, %eax
+; AVX-NEXT: bsrw %ax, %ax
+; AVX-NEXT: xorl $15, %eax
+; AVX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $6, %xmm0, %eax
+; AVX-NEXT: bsrw %ax, %ax
+; AVX-NEXT: xorl $15, %eax
+; AVX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $7, %xmm0, %eax
+; AVX-NEXT: bsrw %ax, %ax
+; AVX-NEXT: xorl $15, %eax
+; AVX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %in, i1 -1)
+ ret <8 x i16> %out
+define <16 x i8> @testv16i8(<16 x i8> %in) {
+; SSE2-LABEL: testv16i8:
+; SSE2: # BB#0:
+; SSE2: pushq %rbp
+; SSE2: movaps %xmm0, -24(%rsp)
+; SSE2-NEXT: movzbl -9(%rsp), %eax
+; SSE2-NEXT: bsrl %eax, %ecx
+; SSE2-NEXT: movl $15, %eax
+; SSE2-NEXT: cmovel %eax, %ecx
+; SSE2-NEXT: xorl $7, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movzbl -10(%rsp), %ebx
+; SSE2-NEXT: movzbl -11(%rsp), %edi
+; SSE2-NEXT: movzbl -12(%rsp), %r9d
+; SSE2-NEXT: movzbl -13(%rsp), %edx
+; SSE2-NEXT: movzbl -14(%rsp), %r11d
+; SSE2-NEXT: movzbl -15(%rsp), %esi
+; SSE2-NEXT: movzbl -16(%rsp), %r8d
+; SSE2-NEXT: movzbl -17(%rsp), %ecx
+; SSE2-NEXT: bsrl %ecx, %ecx
+; SSE2-NEXT: cmovel %eax, %ecx
+; SSE2-NEXT: xorl $7, %ecx
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: bsrl %edx, %ecx
+; SSE2-NEXT: cmovel %eax, %ecx
+; SSE2-NEXT: xorl $7, %ecx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: movzbl -18(%rsp), %edx
+; SSE2-NEXT: movzbl -19(%rsp), %ecx
+; SSE2-NEXT: movzbl -20(%rsp), %r10d
+; SSE2-NEXT: movzbl -21(%rsp), %ebp
+; SSE2-NEXT: bsrl %ebp, %ebp
+; SSE2-NEXT: cmovel %eax, %ebp
+; SSE2-NEXT: xorl $7, %ebp
+; SSE2-NEXT: movd %ebp, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: bsrl %edi, %edi
+; SSE2-NEXT: cmovel %eax, %edi
+; SSE2-NEXT: xorl $7, %edi
+; SSE2-NEXT: movd %edi, %xmm1
+; SSE2-NEXT: bsrl %ecx, %ecx
+; SSE2-NEXT: cmovel %eax, %ecx
+; SSE2-NEXT: xorl $7, %ecx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT: bsrl %esi, %ecx
+; SSE2-NEXT: cmovel %eax, %ecx
+; SSE2-NEXT: xorl $7, %ecx
+; SSE2-NEXT: movd %ecx, %xmm3
+; SSE2-NEXT: movzbl -22(%rsp), %esi
+; SSE2-NEXT: movzbl -23(%rsp), %ecx
+; SSE2-NEXT: bsrl %ecx, %ecx
+; SSE2-NEXT: cmovel %eax, %ecx
+; SSE2-NEXT: xorl $7, %ecx
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: bsrl %ebx, %ecx
+; SSE2-NEXT: cmovel %eax, %ecx
+; SSE2-NEXT: xorl $7, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: bsrl %edx, %ecx
+; SSE2-NEXT: cmovel %eax, %ecx
+; SSE2-NEXT: xorl $7, %ecx
+; SSE2-NEXT: movd %ecx, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE2-NEXT: bsrl %r11d, %ecx
+; SSE2-NEXT: cmovel %eax, %ecx
+; SSE2-NEXT: xorl $7, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: bsrl %esi, %ecx
+; SSE2-NEXT: cmovel %eax, %ecx
+; SSE2-NEXT: xorl $7, %ecx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; SSE2-NEXT: bsrl %r9d, %ecx
+; SSE2-NEXT: cmovel %eax, %ecx
+; SSE2-NEXT: xorl $7, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: bsrl %r10d, %ecx
+; SSE2-NEXT: cmovel %eax, %ecx
+; SSE2-NEXT: xorl $7, %ecx
+; SSE2-NEXT: movd %ecx, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE2-NEXT: bsrl %r8d, %ecx
+; SSE2-NEXT: cmovel %eax, %ecx
+; SSE2-NEXT: xorl $7, %ecx
+; SSE2-NEXT: movd %ecx, %xmm4
+; SSE2-NEXT: movzbl -24(%rsp), %ecx
+; SSE2-NEXT: bsrl %ecx, %ecx
+; SSE2-NEXT: cmovel %eax, %ecx
+; SSE2-NEXT: xorl $7, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: popq %rbx
+; SSE2-NEXT: popq %rbp
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv16i8:
+; SSE3: # BB#0:
+; SSE3: pushq %rbp
+; SSE3: movaps %xmm0, -24(%rsp)
+; SSE3-NEXT: movzbl -9(%rsp), %eax
+; SSE3-NEXT: bsrl %eax, %ecx
+; SSE3-NEXT: movl $15, %eax
+; SSE3-NEXT: cmovel %eax, %ecx
+; SSE3-NEXT: xorl $7, %ecx
+; SSE3-NEXT: movd %ecx, %xmm0
+; SSE3-NEXT: movzbl -10(%rsp), %ebx
+; SSE3-NEXT: movzbl -11(%rsp), %edi
+; SSE3-NEXT: movzbl -12(%rsp), %r9d
+; SSE3-NEXT: movzbl -13(%rsp), %edx
+; SSE3-NEXT: movzbl -14(%rsp), %r11d
+; SSE3-NEXT: movzbl -15(%rsp), %esi
+; SSE3-NEXT: movzbl -16(%rsp), %r8d
+; SSE3-NEXT: movzbl -17(%rsp), %ecx
+; SSE3-NEXT: bsrl %ecx, %ecx
+; SSE3-NEXT: cmovel %eax, %ecx
+; SSE3-NEXT: xorl $7, %ecx
+; SSE3-NEXT: movd %ecx, %xmm1
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE3-NEXT: bsrl %edx, %ecx
+; SSE3-NEXT: cmovel %eax, %ecx
+; SSE3-NEXT: xorl $7, %ecx
+; SSE3-NEXT: movd %ecx, %xmm2
+; SSE3-NEXT: movzbl -18(%rsp), %edx
+; SSE3-NEXT: movzbl -19(%rsp), %ecx
+; SSE3-NEXT: movzbl -20(%rsp), %r10d
+; SSE3-NEXT: movzbl -21(%rsp), %ebp
+; SSE3-NEXT: bsrl %ebp, %ebp
+; SSE3-NEXT: cmovel %eax, %ebp
+; SSE3-NEXT: xorl $7, %ebp
+; SSE3-NEXT: movd %ebp, %xmm0
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE3-NEXT: bsrl %edi, %edi
+; SSE3-NEXT: cmovel %eax, %edi
+; SSE3-NEXT: xorl $7, %edi
+; SSE3-NEXT: movd %edi, %xmm1
+; SSE3-NEXT: bsrl %ecx, %ecx
+; SSE3-NEXT: cmovel %eax, %ecx
+; SSE3-NEXT: xorl $7, %ecx
+; SSE3-NEXT: movd %ecx, %xmm2
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE3-NEXT: bsrl %esi, %ecx
+; SSE3-NEXT: cmovel %eax, %ecx
+; SSE3-NEXT: xorl $7, %ecx
+; SSE3-NEXT: movd %ecx, %xmm3
+; SSE3-NEXT: movzbl -22(%rsp), %esi
+; SSE3-NEXT: movzbl -23(%rsp), %ecx
+; SSE3-NEXT: bsrl %ecx, %ecx
+; SSE3-NEXT: cmovel %eax, %ecx
+; SSE3-NEXT: xorl $7, %ecx
+; SSE3-NEXT: movd %ecx, %xmm1
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE3-NEXT: bsrl %ebx, %ecx
+; SSE3-NEXT: cmovel %eax, %ecx
+; SSE3-NEXT: xorl $7, %ecx
+; SSE3-NEXT: movd %ecx, %xmm0
+; SSE3-NEXT: bsrl %edx, %ecx
+; SSE3-NEXT: cmovel %eax, %ecx
+; SSE3-NEXT: xorl $7, %ecx
+; SSE3-NEXT: movd %ecx, %xmm3
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE3-NEXT: bsrl %r11d, %ecx
+; SSE3-NEXT: cmovel %eax, %ecx
+; SSE3-NEXT: xorl $7, %ecx
+; SSE3-NEXT: movd %ecx, %xmm0
+; SSE3-NEXT: bsrl %esi, %ecx
+; SSE3-NEXT: cmovel %eax, %ecx
+; SSE3-NEXT: xorl $7, %ecx
+; SSE3-NEXT: movd %ecx, %xmm2
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; SSE3-NEXT: bsrl %r9d, %ecx
+; SSE3-NEXT: cmovel %eax, %ecx
+; SSE3-NEXT: xorl $7, %ecx
+; SSE3-NEXT: movd %ecx, %xmm0
+; SSE3-NEXT: bsrl %r10d, %ecx
+; SSE3-NEXT: cmovel %eax, %ecx
+; SSE3-NEXT: xorl $7, %ecx
+; SSE3-NEXT: movd %ecx, %xmm3
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE3-NEXT: bsrl %r8d, %ecx
+; SSE3-NEXT: cmovel %eax, %ecx
+; SSE3-NEXT: xorl $7, %ecx
+; SSE3-NEXT: movd %ecx, %xmm4
+; SSE3-NEXT: movzbl -24(%rsp), %ecx
+; SSE3-NEXT: bsrl %ecx, %ecx
+; SSE3-NEXT: cmovel %eax, %ecx
+; SSE3-NEXT: xorl $7, %ecx
+; SSE3-NEXT: movd %ecx, %xmm0
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE3-NEXT: popq %rbx
+; SSE3-NEXT: popq %rbp
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv16i8:
+; SSSE3: # BB#0:
+; SSSE3: pushq %rbp
+; SSSE3: movaps %xmm0, -24(%rsp)
+; SSSE3-NEXT: movzbl -9(%rsp), %eax
+; SSSE3-NEXT: bsrl %eax, %ecx
+; SSSE3-NEXT: movl $15, %eax
+; SSSE3-NEXT: cmovel %eax, %ecx
+; SSSE3-NEXT: xorl $7, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: movzbl -10(%rsp), %ebx
+; SSSE3-NEXT: movzbl -11(%rsp), %edi
+; SSSE3-NEXT: movzbl -12(%rsp), %r9d
+; SSSE3-NEXT: movzbl -13(%rsp), %edx
+; SSSE3-NEXT: movzbl -14(%rsp), %r11d
+; SSSE3-NEXT: movzbl -15(%rsp), %esi
+; SSSE3-NEXT: movzbl -16(%rsp), %r8d
+; SSSE3-NEXT: movzbl -17(%rsp), %ecx
+; SSSE3-NEXT: bsrl %ecx, %ecx
+; SSSE3-NEXT: cmovel %eax, %ecx
+; SSSE3-NEXT: xorl $7, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: bsrl %edx, %ecx
+; SSSE3-NEXT: cmovel %eax, %ecx
+; SSSE3-NEXT: xorl $7, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: movzbl -18(%rsp), %edx
+; SSSE3-NEXT: movzbl -19(%rsp), %ecx
+; SSSE3-NEXT: movzbl -20(%rsp), %r10d
+; SSSE3-NEXT: movzbl -21(%rsp), %ebp
+; SSSE3-NEXT: bsrl %ebp, %ebp
+; SSSE3-NEXT: cmovel %eax, %ebp
+; SSSE3-NEXT: xorl $7, %ebp
+; SSSE3-NEXT: movd %ebp, %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT: bsrl %edi, %edi
+; SSSE3-NEXT: cmovel %eax, %edi
+; SSSE3-NEXT: xorl $7, %edi
+; SSSE3-NEXT: movd %edi, %xmm1
+; SSSE3-NEXT: bsrl %ecx, %ecx
+; SSSE3-NEXT: cmovel %eax, %ecx
+; SSSE3-NEXT: xorl $7, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSSE3-NEXT: bsrl %esi, %ecx
+; SSSE3-NEXT: cmovel %eax, %ecx
+; SSSE3-NEXT: xorl $7, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm3
+; SSSE3-NEXT: movzbl -22(%rsp), %esi
+; SSSE3-NEXT: movzbl -23(%rsp), %ecx
+; SSSE3-NEXT: bsrl %ecx, %ecx
+; SSSE3-NEXT: cmovel %eax, %ecx
+; SSSE3-NEXT: xorl $7, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: bsrl %ebx, %ecx
+; SSSE3-NEXT: cmovel %eax, %ecx
+; SSSE3-NEXT: xorl $7, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: bsrl %edx, %ecx
+; SSSE3-NEXT: cmovel %eax, %ecx
+; SSSE3-NEXT: xorl $7, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSSE3-NEXT: bsrl %r11d, %ecx
+; SSSE3-NEXT: cmovel %eax, %ecx
+; SSSE3-NEXT: xorl $7, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: bsrl %esi, %ecx
+; SSSE3-NEXT: cmovel %eax, %ecx
+; SSSE3-NEXT: xorl $7, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; SSSE3-NEXT: bsrl %r9d, %ecx
+; SSSE3-NEXT: cmovel %eax, %ecx
+; SSSE3-NEXT: xorl $7, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: bsrl %r10d, %ecx
+; SSSE3-NEXT: cmovel %eax, %ecx
+; SSSE3-NEXT: xorl $7, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSSE3-NEXT: bsrl %r8d, %ecx
+; SSSE3-NEXT: cmovel %eax, %ecx
+; SSSE3-NEXT: xorl $7, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm4
+; SSSE3-NEXT: movzbl -24(%rsp), %ecx
+; SSSE3-NEXT: bsrl %ecx, %ecx
+; SSSE3-NEXT: cmovel %eax, %ecx
+; SSSE3-NEXT: xorl $7, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT: popq %rbx
+; SSSE3-NEXT: popq %rbp
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv16i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: pextrb $1, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %ecx
+; SSE41-NEXT: movl $15, %eax
+; SSE41-NEXT: cmovel %eax, %ecx
+; SSE41-NEXT: xorl $7, %ecx
+; SSE41-NEXT: pextrb $0, %xmm0, %edx
+; SSE41-NEXT: bsrl %edx, %edx
+; SSE41-NEXT: cmovel %eax, %edx
+; SSE41-NEXT: xorl $7, %edx
+; SSE41-NEXT: movd %edx, %xmm1
+; SSE41-NEXT: pinsrb $1, %ecx, %xmm1
+; SSE41-NEXT: pextrb $2, %xmm0, %ecx
+; SSE41-NEXT: bsrl %ecx, %ecx
+; SSE41-NEXT: cmovel %eax, %ecx
+; SSE41-NEXT: xorl $7, %ecx
+; SSE41-NEXT: pinsrb $2, %ecx, %xmm1
+; SSE41-NEXT: pextrb $3, %xmm0, %ecx
+; SSE41-NEXT: bsrl %ecx, %ecx
+; SSE41-NEXT: cmovel %eax, %ecx
+; SSE41-NEXT: xorl $7, %ecx
+; SSE41-NEXT: pinsrb $3, %ecx, %xmm1
+; SSE41-NEXT: pextrb $4, %xmm0, %ecx
+; SSE41-NEXT: bsrl %ecx, %ecx
+; SSE41-NEXT: cmovel %eax, %ecx
+; SSE41-NEXT: xorl $7, %ecx
+; SSE41-NEXT: pinsrb $4, %ecx, %xmm1
+; SSE41-NEXT: pextrb $5, %xmm0, %ecx
+; SSE41-NEXT: bsrl %ecx, %ecx
+; SSE41-NEXT: cmovel %eax, %ecx
+; SSE41-NEXT: xorl $7, %ecx
+; SSE41-NEXT: pinsrb $5, %ecx, %xmm1
+; SSE41-NEXT: pextrb $6, %xmm0, %ecx
+; SSE41-NEXT: bsrl %ecx, %ecx
+; SSE41-NEXT: cmovel %eax, %ecx
+; SSE41-NEXT: xorl $7, %ecx
+; SSE41-NEXT: pinsrb $6, %ecx, %xmm1
+; SSE41-NEXT: pextrb $7, %xmm0, %ecx
+; SSE41-NEXT: bsrl %ecx, %ecx
+; SSE41-NEXT: cmovel %eax, %ecx
+; SSE41-NEXT: xorl $7, %ecx
+; SSE41-NEXT: pinsrb $7, %ecx, %xmm1
+; SSE41-NEXT: pextrb $8, %xmm0, %ecx
+; SSE41-NEXT: bsrl %ecx, %ecx
+; SSE41-NEXT: cmovel %eax, %ecx
+; SSE41-NEXT: xorl $7, %ecx
+; SSE41-NEXT: pinsrb $8, %ecx, %xmm1
+; SSE41-NEXT: pextrb $9, %xmm0, %ecx
+; SSE41-NEXT: bsrl %ecx, %ecx
+; SSE41-NEXT: cmovel %eax, %ecx
+; SSE41-NEXT: xorl $7, %ecx
+; SSE41-NEXT: pinsrb $9, %ecx, %xmm1
+; SSE41-NEXT: pextrb $10, %xmm0, %ecx
+; SSE41-NEXT: bsrl %ecx, %ecx
+; SSE41-NEXT: cmovel %eax, %ecx
+; SSE41-NEXT: xorl $7, %ecx
+; SSE41-NEXT: pinsrb $10, %ecx, %xmm1
+; SSE41-NEXT: pextrb $11, %xmm0, %ecx
+; SSE41-NEXT: bsrl %ecx, %ecx
+; SSE41-NEXT: cmovel %eax, %ecx
+; SSE41-NEXT: xorl $7, %ecx
+; SSE41-NEXT: pinsrb $11, %ecx, %xmm1
+; SSE41-NEXT: pextrb $12, %xmm0, %ecx
+; SSE41-NEXT: bsrl %ecx, %ecx
+; SSE41-NEXT: cmovel %eax, %ecx
+; SSE41-NEXT: xorl $7, %ecx
+; SSE41-NEXT: pinsrb $12, %ecx, %xmm1
+; SSE41-NEXT: pextrb $13, %xmm0, %ecx
+; SSE41-NEXT: bsrl %ecx, %ecx
+; SSE41-NEXT: cmovel %eax, %ecx
+; SSE41-NEXT: xorl $7, %ecx
+; SSE41-NEXT: pinsrb $13, %ecx, %xmm1
+; SSE41-NEXT: pextrb $14, %xmm0, %ecx
+; SSE41-NEXT: bsrl %ecx, %ecx
+; SSE41-NEXT: cmovel %eax, %ecx
+; SSE41-NEXT: xorl $7, %ecx
+; SSE41-NEXT: pinsrb $14, %ecx, %xmm1
+; SSE41-NEXT: pextrb $15, %xmm0, %ecx
+; SSE41-NEXT: bsrl %ecx, %ecx
+; SSE41-NEXT: cmovel %eax, %ecx
+; SSE41-NEXT: xorl $7, %ecx
+; SSE41-NEXT: pinsrb $15, %ecx, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+; AVX-LABEL: testv16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrb $1, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %ecx
+; AVX-NEXT: movl $15, %eax
+; AVX-NEXT: cmovel %eax, %ecx
+; AVX-NEXT: xorl $7, %ecx
+; AVX-NEXT: vpextrb $0, %xmm0, %edx
+; AVX-NEXT: bsrl %edx, %edx
+; AVX-NEXT: cmovel %eax, %edx
+; AVX-NEXT: xorl $7, %edx
+; AVX-NEXT: vmovd %edx, %xmm1
+; AVX-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $2, %xmm0, %ecx
+; AVX-NEXT: bsrl %ecx, %ecx
+; AVX-NEXT: cmovel %eax, %ecx
+; AVX-NEXT: xorl $7, %ecx
+; AVX-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $3, %xmm0, %ecx
+; AVX-NEXT: bsrl %ecx, %ecx
+; AVX-NEXT: cmovel %eax, %ecx
+; AVX-NEXT: xorl $7, %ecx
+; AVX-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $4, %xmm0, %ecx
+; AVX-NEXT: bsrl %ecx, %ecx
+; AVX-NEXT: cmovel %eax, %ecx
+; AVX-NEXT: xorl $7, %ecx
+; AVX-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $5, %xmm0, %ecx
+; AVX-NEXT: bsrl %ecx, %ecx
+; AVX-NEXT: cmovel %eax, %ecx
+; AVX-NEXT: xorl $7, %ecx
+; AVX-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $6, %xmm0, %ecx
+; AVX-NEXT: bsrl %ecx, %ecx
+; AVX-NEXT: cmovel %eax, %ecx
+; AVX-NEXT: xorl $7, %ecx
+; AVX-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $7, %xmm0, %ecx
+; AVX-NEXT: bsrl %ecx, %ecx
+; AVX-NEXT: cmovel %eax, %ecx
+; AVX-NEXT: xorl $7, %ecx
+; AVX-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $8, %xmm0, %ecx
+; AVX-NEXT: bsrl %ecx, %ecx
+; AVX-NEXT: cmovel %eax, %ecx
+; AVX-NEXT: xorl $7, %ecx
+; AVX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $9, %xmm0, %ecx
+; AVX-NEXT: bsrl %ecx, %ecx
+; AVX-NEXT: cmovel %eax, %ecx
+; AVX-NEXT: xorl $7, %ecx
+; AVX-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $10, %xmm0, %ecx
+; AVX-NEXT: bsrl %ecx, %ecx
+; AVX-NEXT: cmovel %eax, %ecx
+; AVX-NEXT: xorl $7, %ecx
+; AVX-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $11, %xmm0, %ecx
+; AVX-NEXT: bsrl %ecx, %ecx
+; AVX-NEXT: cmovel %eax, %ecx
+; AVX-NEXT: xorl $7, %ecx
+; AVX-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $12, %xmm0, %ecx
+; AVX-NEXT: bsrl %ecx, %ecx
+; AVX-NEXT: cmovel %eax, %ecx
+; AVX-NEXT: xorl $7, %ecx
+; AVX-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $13, %xmm0, %ecx
+; AVX-NEXT: bsrl %ecx, %ecx
+; AVX-NEXT: cmovel %eax, %ecx
+; AVX-NEXT: xorl $7, %ecx
+; AVX-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $14, %xmm0, %ecx
+; AVX-NEXT: bsrl %ecx, %ecx
+; AVX-NEXT: cmovel %eax, %ecx
+; AVX-NEXT: xorl $7, %ecx
+; AVX-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $15, %xmm0, %ecx
+; AVX-NEXT: bsrl %ecx, %ecx
+; AVX-NEXT: cmovel %eax, %ecx
+; AVX-NEXT: xorl $7, %ecx
+; AVX-NEXT: vpinsrb $15, %ecx, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %in, i1 0)
+ ret <16 x i8> %out
+define <16 x i8> @testv16i8u(<16 x i8> %in) {
+; SSE2-LABEL: testv16i8u:
+; SSE2: # BB#0:
+; SSE2: pushq %rbx
+; SSE2: movaps %xmm0, -16(%rsp)
+; SSE2-NEXT: movzbl -1(%rsp), %eax
+; SSE2-NEXT: bsrl %eax, %eax
+; SSE2-NEXT: xorl $7, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: movzbl -2(%rsp), %edi
+; SSE2-NEXT: movzbl -3(%rsp), %edx
+; SSE2-NEXT: movzbl -4(%rsp), %r9d
+; SSE2-NEXT: movzbl -5(%rsp), %eax
+; SSE2-NEXT: movzbl -6(%rsp), %r10d
+; SSE2-NEXT: movzbl -7(%rsp), %ecx
+; SSE2-NEXT: movzbl -8(%rsp), %r8d
+; SSE2-NEXT: movzbl -9(%rsp), %esi
+; SSE2-NEXT: bsrl %esi, %esi
+; SSE2-NEXT: xorl $7, %esi
+; SSE2-NEXT: movd %esi, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: bsrl %eax, %eax
+; SSE2-NEXT: xorl $7, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: movzbl -10(%rsp), %eax
+; SSE2-NEXT: movzbl -11(%rsp), %esi
+; SSE2-NEXT: movzbl -12(%rsp), %r11d
+; SSE2-NEXT: movzbl -13(%rsp), %ebx
+; SSE2-NEXT: bsrl %ebx, %ebx
+; SSE2-NEXT: xorl $7, %ebx
+; SSE2-NEXT: movd %ebx, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT: bsrl %edx, %edx
+; SSE2-NEXT: xorl $7, %edx
+; SSE2-NEXT: movd %edx, %xmm0
+; SSE2-NEXT: bsrl %esi, %edx
+; SSE2-NEXT: xorl $7, %edx
+; SSE2-NEXT: movd %edx, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE2-NEXT: bsrl %ecx, %ecx
+; SSE2-NEXT: xorl $7, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movzbl -14(%rsp), %ecx
+; SSE2-NEXT: movzbl -15(%rsp), %edx
+; SSE2-NEXT: bsrl %edx, %edx
+; SSE2-NEXT: xorl $7, %edx
+; SSE2-NEXT: movd %edx, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: bsrl %edi, %edx
+; SSE2-NEXT: xorl $7, %edx
+; SSE2-NEXT: movd %edx, %xmm0
+; SSE2-NEXT: bsrl %eax, %eax
+; SSE2-NEXT: xorl $7, %eax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: bsrl %r10d, %eax
+; SSE2-NEXT: xorl $7, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: bsrl %ecx, %eax
+; SSE2-NEXT: xorl $7, %eax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT: bsrl %r9d, %eax
+; SSE2-NEXT: xorl $7, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: bsrl %r11d, %eax
+; SSE2-NEXT: xorl $7, %eax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: bsrl %r8d, %eax
+; SSE2-NEXT: xorl $7, %eax
+; SSE2-NEXT: movd %eax, %xmm4
+; SSE2-NEXT: movzbl -16(%rsp), %eax
+; SSE2-NEXT: bsrl %eax, %eax
+; SSE2-NEXT: xorl $7, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: popq %rbx
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv16i8u:
+; SSE3: # BB#0:
+; SSE3: pushq %rbx
+; SSE3: movaps %xmm0, -16(%rsp)
+; SSE3-NEXT: movzbl -1(%rsp), %eax
+; SSE3-NEXT: bsrl %eax, %eax
+; SSE3-NEXT: xorl $7, %eax
+; SSE3-NEXT: movd %eax, %xmm0
+; SSE3-NEXT: movzbl -2(%rsp), %edi
+; SSE3-NEXT: movzbl -3(%rsp), %edx
+; SSE3-NEXT: movzbl -4(%rsp), %r9d
+; SSE3-NEXT: movzbl -5(%rsp), %eax
+; SSE3-NEXT: movzbl -6(%rsp), %r10d
+; SSE3-NEXT: movzbl -7(%rsp), %ecx
+; SSE3-NEXT: movzbl -8(%rsp), %r8d
+; SSE3-NEXT: movzbl -9(%rsp), %esi
+; SSE3-NEXT: bsrl %esi, %esi
+; SSE3-NEXT: xorl $7, %esi
+; SSE3-NEXT: movd %esi, %xmm1
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE3-NEXT: bsrl %eax, %eax
+; SSE3-NEXT: xorl $7, %eax
+; SSE3-NEXT: movd %eax, %xmm0
+; SSE3-NEXT: movzbl -10(%rsp), %eax
+; SSE3-NEXT: movzbl -11(%rsp), %esi
+; SSE3-NEXT: movzbl -12(%rsp), %r11d
+; SSE3-NEXT: movzbl -13(%rsp), %ebx
+; SSE3-NEXT: bsrl %ebx, %ebx
+; SSE3-NEXT: xorl $7, %ebx
+; SSE3-NEXT: movd %ebx, %xmm2
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE3-NEXT: bsrl %edx, %edx
+; SSE3-NEXT: xorl $7, %edx
+; SSE3-NEXT: movd %edx, %xmm0
+; SSE3-NEXT: bsrl %esi, %edx
+; SSE3-NEXT: xorl $7, %edx
+; SSE3-NEXT: movd %edx, %xmm3
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE3-NEXT: bsrl %ecx, %ecx
+; SSE3-NEXT: xorl $7, %ecx
+; SSE3-NEXT: movd %ecx, %xmm0
+; SSE3-NEXT: movzbl -14(%rsp), %ecx
+; SSE3-NEXT: movzbl -15(%rsp), %edx
+; SSE3-NEXT: bsrl %edx, %edx
+; SSE3-NEXT: xorl $7, %edx
+; SSE3-NEXT: movd %edx, %xmm1
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE3-NEXT: bsrl %edi, %edx
+; SSE3-NEXT: xorl $7, %edx
+; SSE3-NEXT: movd %edx, %xmm0
+; SSE3-NEXT: bsrl %eax, %eax
+; SSE3-NEXT: xorl $7, %eax
+; SSE3-NEXT: movd %eax, %xmm2
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE3-NEXT: bsrl %r10d, %eax
+; SSE3-NEXT: xorl $7, %eax
+; SSE3-NEXT: movd %eax, %xmm0
+; SSE3-NEXT: bsrl %ecx, %eax
+; SSE3-NEXT: xorl $7, %eax
+; SSE3-NEXT: movd %eax, %xmm3
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE3-NEXT: bsrl %r9d, %eax
+; SSE3-NEXT: xorl $7, %eax
+; SSE3-NEXT: movd %eax, %xmm0
+; SSE3-NEXT: bsrl %r11d, %eax
+; SSE3-NEXT: xorl $7, %eax
+; SSE3-NEXT: movd %eax, %xmm2
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE3-NEXT: bsrl %r8d, %eax
+; SSE3-NEXT: xorl $7, %eax
+; SSE3-NEXT: movd %eax, %xmm4
+; SSE3-NEXT: movzbl -16(%rsp), %eax
+; SSE3-NEXT: bsrl %eax, %eax
+; SSE3-NEXT: xorl $7, %eax
+; SSE3-NEXT: movd %eax, %xmm0
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE3-NEXT: popq %rbx
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv16i8u:
+; SSSE3: # BB#0:
+; SSSE3: pushq %rbx
+; SSSE3: movaps %xmm0, -16(%rsp)
+; SSSE3-NEXT: movzbl -1(%rsp), %eax
+; SSSE3-NEXT: bsrl %eax, %eax
+; SSSE3-NEXT: xorl $7, %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: movzbl -2(%rsp), %edi
+; SSSE3-NEXT: movzbl -3(%rsp), %edx
+; SSSE3-NEXT: movzbl -4(%rsp), %r9d
+; SSSE3-NEXT: movzbl -5(%rsp), %eax
+; SSSE3-NEXT: movzbl -6(%rsp), %r10d
+; SSSE3-NEXT: movzbl -7(%rsp), %ecx
+; SSSE3-NEXT: movzbl -8(%rsp), %r8d
+; SSSE3-NEXT: movzbl -9(%rsp), %esi
+; SSSE3-NEXT: bsrl %esi, %esi
+; SSSE3-NEXT: xorl $7, %esi
+; SSSE3-NEXT: movd %esi, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: bsrl %eax, %eax
+; SSSE3-NEXT: xorl $7, %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: movzbl -10(%rsp), %eax
+; SSSE3-NEXT: movzbl -11(%rsp), %esi
+; SSSE3-NEXT: movzbl -12(%rsp), %r11d
+; SSSE3-NEXT: movzbl -13(%rsp), %ebx
+; SSSE3-NEXT: bsrl %ebx, %ebx
+; SSSE3-NEXT: xorl $7, %ebx
+; SSSE3-NEXT: movd %ebx, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSSE3-NEXT: bsrl %edx, %edx
+; SSSE3-NEXT: xorl $7, %edx
+; SSSE3-NEXT: movd %edx, %xmm0
+; SSSE3-NEXT: bsrl %esi, %edx
+; SSSE3-NEXT: xorl $7, %edx
+; SSSE3-NEXT: movd %edx, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSSE3-NEXT: bsrl %ecx, %ecx
+; SSSE3-NEXT: xorl $7, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: movzbl -14(%rsp), %ecx
+; SSSE3-NEXT: movzbl -15(%rsp), %edx
+; SSSE3-NEXT: bsrl %edx, %edx
+; SSSE3-NEXT: xorl $7, %edx
+; SSSE3-NEXT: movd %edx, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: bsrl %edi, %edx
+; SSSE3-NEXT: xorl $7, %edx
+; SSSE3-NEXT: movd %edx, %xmm0
+; SSSE3-NEXT: bsrl %eax, %eax
+; SSSE3-NEXT: xorl $7, %eax
+; SSSE3-NEXT: movd %eax, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSSE3-NEXT: bsrl %r10d, %eax
+; SSSE3-NEXT: xorl $7, %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: bsrl %ecx, %eax
+; SSSE3-NEXT: xorl $7, %eax
+; SSSE3-NEXT: movd %eax, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSSE3-NEXT: bsrl %r9d, %eax
+; SSSE3-NEXT: xorl $7, %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: bsrl %r11d, %eax
+; SSSE3-NEXT: xorl $7, %eax
+; SSSE3-NEXT: movd %eax, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSSE3-NEXT: bsrl %r8d, %eax
+; SSSE3-NEXT: xorl $7, %eax
+; SSSE3-NEXT: movd %eax, %xmm4
+; SSSE3-NEXT: movzbl -16(%rsp), %eax
+; SSSE3-NEXT: bsrl %eax, %eax
+; SSSE3-NEXT: xorl $7, %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT: popq %rbx
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv16i8u:
+; SSE41: # BB#0:
+; SSE41-NEXT: pextrb $1, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: xorl $7, %eax
+; SSE41-NEXT: pextrb $0, %xmm0, %ecx
+; SSE41-NEXT: bsrl %ecx, %ecx
+; SSE41-NEXT: xorl $7, %ecx
+; SSE41-NEXT: movd %ecx, %xmm1
+; SSE41-NEXT: pinsrb $1, %eax, %xmm1
+; SSE41-NEXT: pextrb $2, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: xorl $7, %eax
+; SSE41-NEXT: pinsrb $2, %eax, %xmm1
+; SSE41-NEXT: pextrb $3, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: xorl $7, %eax
+; SSE41-NEXT: pinsrb $3, %eax, %xmm1
+; SSE41-NEXT: pextrb $4, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: xorl $7, %eax
+; SSE41-NEXT: pinsrb $4, %eax, %xmm1
+; SSE41-NEXT: pextrb $5, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: xorl $7, %eax
+; SSE41-NEXT: pinsrb $5, %eax, %xmm1
+; SSE41-NEXT: pextrb $6, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: xorl $7, %eax
+; SSE41-NEXT: pinsrb $6, %eax, %xmm1
+; SSE41-NEXT: pextrb $7, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: xorl $7, %eax
+; SSE41-NEXT: pinsrb $7, %eax, %xmm1
+; SSE41-NEXT: pextrb $8, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: xorl $7, %eax
+; SSE41-NEXT: pinsrb $8, %eax, %xmm1
+; SSE41-NEXT: pextrb $9, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: xorl $7, %eax
+; SSE41-NEXT: pinsrb $9, %eax, %xmm1
+; SSE41-NEXT: pextrb $10, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: xorl $7, %eax
+; SSE41-NEXT: pinsrb $10, %eax, %xmm1
+; SSE41-NEXT: pextrb $11, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: xorl $7, %eax
+; SSE41-NEXT: pinsrb $11, %eax, %xmm1
+; SSE41-NEXT: pextrb $12, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: xorl $7, %eax
+; SSE41-NEXT: pinsrb $12, %eax, %xmm1
+; SSE41-NEXT: pextrb $13, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: xorl $7, %eax
+; SSE41-NEXT: pinsrb $13, %eax, %xmm1
+; SSE41-NEXT: pextrb $14, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: xorl $7, %eax
+; SSE41-NEXT: pinsrb $14, %eax, %xmm1
+; SSE41-NEXT: pextrb $15, %xmm0, %eax
+; SSE41-NEXT: bsrl %eax, %eax
+; SSE41-NEXT: xorl $7, %eax
+; SSE41-NEXT: pinsrb $15, %eax, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+; AVX-LABEL: testv16i8u:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrb $1, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: xorl $7, %eax
+; AVX-NEXT: vpextrb $0, %xmm0, %ecx
+; AVX-NEXT: bsrl %ecx, %ecx
+; AVX-NEXT: xorl $7, %ecx
+; AVX-NEXT: vmovd %ecx, %xmm1
+; AVX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $2, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: xorl $7, %eax
+; AVX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $3, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: xorl $7, %eax
+; AVX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $4, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: xorl $7, %eax
+; AVX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $5, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: xorl $7, %eax
+; AVX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $6, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: xorl $7, %eax
+; AVX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $7, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: xorl $7, %eax
+; AVX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $8, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: xorl $7, %eax
+; AVX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $9, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: xorl $7, %eax
+; AVX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $10, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: xorl $7, %eax
+; AVX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $11, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: xorl $7, %eax
+; AVX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $12, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: xorl $7, %eax
+; AVX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $13, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: xorl $7, %eax
+; AVX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $14, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: xorl $7, %eax
+; AVX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $15, %xmm0, %eax
+; AVX-NEXT: bsrl %eax, %eax
+; AVX-NEXT: xorl $7, %eax
+; AVX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %in, i1 -1)
+ ret <16 x i8> %out
+define <2 x i64> @foldv2i64() {
+; SSE-LABEL: foldv2i64:
+; SSE: # BB#0:
+; SSE-NEXT: movl $55, %eax
+; SSE-NEXT: movd %rax, %xmm0
+; SSE-NEXT: retq
+; AVX-LABEL: foldv2i64:
+; AVX: # BB#0:
+; AVX-NEXT: movl $55, %eax
+; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: retq
+ %out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> <i64 256, i64 -1>, i1 0)
+ ret <2 x i64> %out
+define <2 x i64> @foldv2i64u() {
+; SSE-LABEL: foldv2i64u:
+; SSE: # BB#0:
+; SSE-NEXT: movl $55, %eax
+; SSE-NEXT: movd %rax, %xmm0
+; SSE-NEXT: retq
+; AVX-LABEL: foldv2i64u:
+; AVX: # BB#0:
+; AVX-NEXT: movl $55, %eax
+; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: retq
+ %out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> <i64 256, i64 -1>, i1 -1)
+ ret <2 x i64> %out
+define <4 x i32> @foldv4i32() {
+; SSE-LABEL: foldv4i32:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [23,0,32,24]
+; SSE-NEXT: retq
+; AVX-LABEL: foldv4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
+; AVX-NEXT: retq
+ %out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> <i32 256, i32 -1, i32 0, i32 255>, i1 0)
+ ret <4 x i32> %out
+define <4 x i32> @foldv4i32u() {
+; SSE-LABEL: foldv4i32u:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [23,0,32,24]
+; SSE-NEXT: retq
+; AVX-LABEL: foldv4i32u:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
+; AVX-NEXT: retq
+ %out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> <i32 256, i32 -1, i32 0, i32 255>, i1 -1)
+ ret <4 x i32> %out
+define <8 x i16> @foldv8i16() {
+; SSE-LABEL: foldv8i16:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
+; SSE-NEXT: retq
+; AVX-LABEL: foldv8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
+; AVX-NEXT: retq
+ %out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88>, i1 0)
+ ret <8 x i16> %out
+define <8 x i16> @foldv8i16u() {
+; SSE-LABEL: foldv8i16u:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
+; SSE-NEXT: retq
+; AVX-LABEL: foldv8i16u:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
+; AVX-NEXT: retq
+ %out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88>, i1 -1)
+ ret <8 x i16> %out
+define <16 x i8> @foldv16i8() {
+; SSE-LABEL: foldv16i8:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
+; SSE-NEXT: retq
+; AVX-LABEL: foldv16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
+; AVX-NEXT: retq
+ %out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32>, i1 0)
+ ret <16 x i8> %out
+define <16 x i8> @foldv16i8u() {
+; SSE-LABEL: foldv16i8u:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
+; SSE-NEXT: retq
+; AVX-LABEL: foldv16i8u:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
+; AVX-NEXT: retq
+ %out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32>, i1 -1)
+ ret <16 x i8> %out
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1)
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1)
diff --git a/test/CodeGen/X86/vector-lzcnt-256.ll b/test/CodeGen/X86/vector-lzcnt-256.ll
new file mode 100644
index 0000000..48abe12
--- /dev/null
+++ b/test/CodeGen/X86/vector-lzcnt-256.ll
@@ -0,0 +1,1305 @@
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+target triple = "x86_64-unknown-unknown"
+define <4 x i64> @testv4i64(<4 x i64> %in) {
+; AVX1-LABEL: testv4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpextrq $1, %xmm1, %rax
+; AVX1-NEXT: bsrq %rax, %rax
+; AVX1-NEXT: movl $127, %ecx
+; AVX1-NEXT: cmoveq %rcx, %rax
+; AVX1-NEXT: xorq $63, %rax
+; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vmovq %xmm1, %rax
+; AVX1-NEXT: bsrq %rax, %rax
+; AVX1-NEXT: cmoveq %rcx, %rax
+; AVX1-NEXT: xorq $63, %rax
+; AVX1-NEXT: vmovq %rax, %xmm1
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: bsrq %rax, %rax
+; AVX1-NEXT: cmoveq %rcx, %rax
+; AVX1-NEXT: xorq $63, %rax
+; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: bsrq %rax, %rax
+; AVX1-NEXT: cmoveq %rcx, %rax
+; AVX1-NEXT: xorq $63, %rax
+; AVX1-NEXT: vmovq %rax, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpextrq $1, %xmm1, %rax
+; AVX2-NEXT: bsrq %rax, %rax
+; AVX2-NEXT: movl $127, %ecx
+; AVX2-NEXT: cmoveq %rcx, %rax
+; AVX2-NEXT: xorq $63, %rax
+; AVX2-NEXT: vmovq %rax, %xmm2
+; AVX2-NEXT: vmovq %xmm1, %rax
+; AVX2-NEXT: bsrq %rax, %rax
+; AVX2-NEXT: cmoveq %rcx, %rax
+; AVX2-NEXT: xorq $63, %rax
+; AVX2-NEXT: vmovq %rax, %xmm1
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX2-NEXT: vpextrq $1, %xmm0, %rax
+; AVX2-NEXT: bsrq %rax, %rax
+; AVX2-NEXT: cmoveq %rcx, %rax
+; AVX2-NEXT: xorq $63, %rax
+; AVX2-NEXT: vmovq %rax, %xmm2
+; AVX2-NEXT: vmovq %xmm0, %rax
+; AVX2-NEXT: bsrq %rax, %rax
+; AVX2-NEXT: cmoveq %rcx, %rax
+; AVX2-NEXT: xorq $63, %rax
+; AVX2-NEXT: vmovq %rax, %xmm0
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 0)
+ ret <4 x i64> %out
+define <4 x i64> @testv4i64u(<4 x i64> %in) {
+; AVX1-LABEL: testv4i64u:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpextrq $1, %xmm1, %rax
+; AVX1-NEXT: bsrq %rax, %rax
+; AVX1-NEXT: xorq $63, %rax
+; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vmovq %xmm1, %rax
+; AVX1-NEXT: bsrq %rax, %rax
+; AVX1-NEXT: xorq $63, %rax
+; AVX1-NEXT: vmovq %rax, %xmm1
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: bsrq %rax, %rax
+; AVX1-NEXT: xorq $63, %rax
+; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: bsrq %rax, %rax
+; AVX1-NEXT: xorq $63, %rax
+; AVX1-NEXT: vmovq %rax, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv4i64u:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpextrq $1, %xmm1, %rax
+; AVX2-NEXT: bsrq %rax, %rax
+; AVX2-NEXT: xorq $63, %rax
+; AVX2-NEXT: vmovq %rax, %xmm2
+; AVX2-NEXT: vmovq %xmm1, %rax
+; AVX2-NEXT: bsrq %rax, %rax
+; AVX2-NEXT: xorq $63, %rax
+; AVX2-NEXT: vmovq %rax, %xmm1
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX2-NEXT: vpextrq $1, %xmm0, %rax
+; AVX2-NEXT: bsrq %rax, %rax
+; AVX2-NEXT: xorq $63, %rax
+; AVX2-NEXT: vmovq %rax, %xmm2
+; AVX2-NEXT: vmovq %xmm0, %rax
+; AVX2-NEXT: bsrq %rax, %rax
+; AVX2-NEXT: xorq $63, %rax
+; AVX2-NEXT: vmovq %rax, %xmm0
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 -1)
+ ret <4 x i64> %out
+define <8 x i32> @testv8i32(<8 x i32> %in) {
+; AVX1-LABEL: testv8i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpextrd $1, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %ecx
+; AVX1-NEXT: movl $63, %eax
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $31, %ecx
+; AVX1-NEXT: vmovd %xmm1, %edx
+; AVX1-NEXT: bsrl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: xorl $31, %edx
+; AVX1-NEXT: vmovd %edx, %xmm2
+; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $2, %xmm1, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $31, %ecx
+; AVX1-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $3, %xmm1, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $31, %ecx
+; AVX1-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm1
+; AVX1-NEXT: vpextrd $1, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $31, %ecx
+; AVX1-NEXT: vmovd %xmm0, %edx
+; AVX1-NEXT: bsrl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: xorl $31, %edx
+; AVX1-NEXT: vmovd %edx, %xmm2
+; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $2, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $31, %ecx
+; AVX1-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $3, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $31, %ecx
+; AVX1-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv8i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpextrd $1, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %ecx
+; AVX2-NEXT: movl $63, %eax
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $31, %ecx
+; AVX2-NEXT: vmovd %xmm1, %edx
+; AVX2-NEXT: bsrl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: xorl $31, %edx
+; AVX2-NEXT: vmovd %edx, %xmm2
+; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $2, %xmm1, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $31, %ecx
+; AVX2-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $3, %xmm1, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $31, %ecx
+; AVX2-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm1
+; AVX2-NEXT: vpextrd $1, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $31, %ecx
+; AVX2-NEXT: vmovd %xmm0, %edx
+; AVX2-NEXT: bsrl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: xorl $31, %edx
+; AVX2-NEXT: vmovd %edx, %xmm2
+; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $2, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $31, %ecx
+; AVX2-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $3, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $31, %ecx
+; AVX2-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 0)
+ ret <8 x i32> %out
+define <8 x i32> @testv8i32u(<8 x i32> %in) {
+; AVX1-LABEL: testv8i32u:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpextrd $1, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $31, %eax
+; AVX1-NEXT: vmovd %xmm1, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: xorl $31, %ecx
+; AVX1-NEXT: vmovd %ecx, %xmm2
+; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $2, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $31, %eax
+; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $3, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $31, %eax
+; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
+; AVX1-NEXT: vpextrd $1, %xmm0, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $31, %eax
+; AVX1-NEXT: vmovd %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: xorl $31, %ecx
+; AVX1-NEXT: vmovd %ecx, %xmm2
+; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $2, %xmm0, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $31, %eax
+; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $3, %xmm0, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $31, %eax
+; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv8i32u:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpextrd $1, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $31, %eax
+; AVX2-NEXT: vmovd %xmm1, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: xorl $31, %ecx
+; AVX2-NEXT: vmovd %ecx, %xmm2
+; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $2, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $31, %eax
+; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $3, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $31, %eax
+; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
+; AVX2-NEXT: vpextrd $1, %xmm0, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $31, %eax
+; AVX2-NEXT: vmovd %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: xorl $31, %ecx
+; AVX2-NEXT: vmovd %ecx, %xmm2
+; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $2, %xmm0, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $31, %eax
+; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $3, %xmm0, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $31, %eax
+; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 -1)
+ ret <8 x i32> %out
+define <16 x i16> @testv16i16(<16 x i16> %in) {
+; AVX1-LABEL: testv16i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpextrw $1, %xmm1, %eax
+; AVX1-NEXT: bsrw %ax, %cx
+; AVX1-NEXT: movw $31, %ax
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: xorl $15, %ecx
+; AVX1-NEXT: vmovd %xmm1, %edx
+; AVX1-NEXT: bsrw %dx, %dx
+; AVX1-NEXT: cmovew %ax, %dx
+; AVX1-NEXT: xorl $15, %edx
+; AVX1-NEXT: vmovd %edx, %xmm2
+; AVX1-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $2, %xmm1, %ecx
+; AVX1-NEXT: bsrw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: xorl $15, %ecx
+; AVX1-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $3, %xmm1, %ecx
+; AVX1-NEXT: bsrw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: xorl $15, %ecx
+; AVX1-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $4, %xmm1, %ecx
+; AVX1-NEXT: bsrw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: xorl $15, %ecx
+; AVX1-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $5, %xmm1, %ecx
+; AVX1-NEXT: bsrw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: xorl $15, %ecx
+; AVX1-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $6, %xmm1, %ecx
+; AVX1-NEXT: bsrw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: xorl $15, %ecx
+; AVX1-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $7, %xmm1, %ecx
+; AVX1-NEXT: bsrw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: xorl $15, %ecx
+; AVX1-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm1
+; AVX1-NEXT: vpextrw $1, %xmm0, %ecx
+; AVX1-NEXT: bsrw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: xorl $15, %ecx
+; AVX1-NEXT: vmovd %xmm0, %edx
+; AVX1-NEXT: bsrw %dx, %dx
+; AVX1-NEXT: cmovew %ax, %dx
+; AVX1-NEXT: xorl $15, %edx
+; AVX1-NEXT: vmovd %edx, %xmm2
+; AVX1-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $2, %xmm0, %ecx
+; AVX1-NEXT: bsrw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: xorl $15, %ecx
+; AVX1-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $3, %xmm0, %ecx
+; AVX1-NEXT: bsrw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: xorl $15, %ecx
+; AVX1-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $4, %xmm0, %ecx
+; AVX1-NEXT: bsrw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: xorl $15, %ecx
+; AVX1-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $5, %xmm0, %ecx
+; AVX1-NEXT: bsrw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: xorl $15, %ecx
+; AVX1-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $6, %xmm0, %ecx
+; AVX1-NEXT: bsrw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: xorl $15, %ecx
+; AVX1-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $7, %xmm0, %ecx
+; AVX1-NEXT: bsrw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: xorl $15, %ecx
+; AVX1-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv16i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpextrw $1, %xmm1, %eax
+; AVX2-NEXT: bsrw %ax, %cx
+; AVX2-NEXT: movw $31, %ax
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: xorl $15, %ecx
+; AVX2-NEXT: vmovd %xmm1, %edx
+; AVX2-NEXT: bsrw %dx, %dx
+; AVX2-NEXT: cmovew %ax, %dx
+; AVX2-NEXT: xorl $15, %edx
+; AVX2-NEXT: vmovd %edx, %xmm2
+; AVX2-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $2, %xmm1, %ecx
+; AVX2-NEXT: bsrw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: xorl $15, %ecx
+; AVX2-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $3, %xmm1, %ecx
+; AVX2-NEXT: bsrw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: xorl $15, %ecx
+; AVX2-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $4, %xmm1, %ecx
+; AVX2-NEXT: bsrw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: xorl $15, %ecx
+; AVX2-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $5, %xmm1, %ecx
+; AVX2-NEXT: bsrw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: xorl $15, %ecx
+; AVX2-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $6, %xmm1, %ecx
+; AVX2-NEXT: bsrw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: xorl $15, %ecx
+; AVX2-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $7, %xmm1, %ecx
+; AVX2-NEXT: bsrw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: xorl $15, %ecx
+; AVX2-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm1
+; AVX2-NEXT: vpextrw $1, %xmm0, %ecx
+; AVX2-NEXT: bsrw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: xorl $15, %ecx
+; AVX2-NEXT: vmovd %xmm0, %edx
+; AVX2-NEXT: bsrw %dx, %dx
+; AVX2-NEXT: cmovew %ax, %dx
+; AVX2-NEXT: xorl $15, %edx
+; AVX2-NEXT: vmovd %edx, %xmm2
+; AVX2-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $2, %xmm0, %ecx
+; AVX2-NEXT: bsrw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: xorl $15, %ecx
+; AVX2-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $3, %xmm0, %ecx
+; AVX2-NEXT: bsrw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: xorl $15, %ecx
+; AVX2-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $4, %xmm0, %ecx
+; AVX2-NEXT: bsrw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: xorl $15, %ecx
+; AVX2-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $5, %xmm0, %ecx
+; AVX2-NEXT: bsrw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: xorl $15, %ecx
+; AVX2-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $6, %xmm0, %ecx
+; AVX2-NEXT: bsrw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: xorl $15, %ecx
+; AVX2-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $7, %xmm0, %ecx
+; AVX2-NEXT: bsrw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: xorl $15, %ecx
+; AVX2-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 0)
+ ret <16 x i16> %out
+define <16 x i16> @testv16i16u(<16 x i16> %in) {
+; AVX1-LABEL: testv16i16u:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpextrw $1, %xmm1, %eax
+; AVX1-NEXT: bsrw %ax, %ax
+; AVX1-NEXT: xorl $15, %eax
+; AVX1-NEXT: vmovd %xmm1, %ecx
+; AVX1-NEXT: bsrw %cx, %cx
+; AVX1-NEXT: xorl $15, %ecx
+; AVX1-NEXT: vmovd %ecx, %xmm2
+; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $2, %xmm1, %eax
+; AVX1-NEXT: bsrw %ax, %ax
+; AVX1-NEXT: xorl $15, %eax
+; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $3, %xmm1, %eax
+; AVX1-NEXT: bsrw %ax, %ax
+; AVX1-NEXT: xorl $15, %eax
+; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $4, %xmm1, %eax
+; AVX1-NEXT: bsrw %ax, %ax
+; AVX1-NEXT: xorl $15, %eax
+; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $5, %xmm1, %eax
+; AVX1-NEXT: bsrw %ax, %ax
+; AVX1-NEXT: xorl $15, %eax
+; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $6, %xmm1, %eax
+; AVX1-NEXT: bsrw %ax, %ax
+; AVX1-NEXT: xorl $15, %eax
+; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $7, %xmm1, %eax
+; AVX1-NEXT: bsrw %ax, %ax
+; AVX1-NEXT: xorl $15, %eax
+; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
+; AVX1-NEXT: vpextrw $1, %xmm0, %eax
+; AVX1-NEXT: bsrw %ax, %ax
+; AVX1-NEXT: xorl $15, %eax
+; AVX1-NEXT: vmovd %xmm0, %ecx
+; AVX1-NEXT: bsrw %cx, %cx
+; AVX1-NEXT: xorl $15, %ecx
+; AVX1-NEXT: vmovd %ecx, %xmm2
+; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $2, %xmm0, %eax
+; AVX1-NEXT: bsrw %ax, %ax
+; AVX1-NEXT: xorl $15, %eax
+; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $3, %xmm0, %eax
+; AVX1-NEXT: bsrw %ax, %ax
+; AVX1-NEXT: xorl $15, %eax
+; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $4, %xmm0, %eax
+; AVX1-NEXT: bsrw %ax, %ax
+; AVX1-NEXT: xorl $15, %eax
+; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $5, %xmm0, %eax
+; AVX1-NEXT: bsrw %ax, %ax
+; AVX1-NEXT: xorl $15, %eax
+; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $6, %xmm0, %eax
+; AVX1-NEXT: bsrw %ax, %ax
+; AVX1-NEXT: xorl $15, %eax
+; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $7, %xmm0, %eax
+; AVX1-NEXT: bsrw %ax, %ax
+; AVX1-NEXT: xorl $15, %eax
+; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv16i16u:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpextrw $1, %xmm1, %eax
+; AVX2-NEXT: bsrw %ax, %ax
+; AVX2-NEXT: xorl $15, %eax
+; AVX2-NEXT: vmovd %xmm1, %ecx
+; AVX2-NEXT: bsrw %cx, %cx
+; AVX2-NEXT: xorl $15, %ecx
+; AVX2-NEXT: vmovd %ecx, %xmm2
+; AVX2-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $2, %xmm1, %eax
+; AVX2-NEXT: bsrw %ax, %ax
+; AVX2-NEXT: xorl $15, %eax
+; AVX2-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $3, %xmm1, %eax
+; AVX2-NEXT: bsrw %ax, %ax
+; AVX2-NEXT: xorl $15, %eax
+; AVX2-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $4, %xmm1, %eax
+; AVX2-NEXT: bsrw %ax, %ax
+; AVX2-NEXT: xorl $15, %eax
+; AVX2-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $5, %xmm1, %eax
+; AVX2-NEXT: bsrw %ax, %ax
+; AVX2-NEXT: xorl $15, %eax
+; AVX2-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $6, %xmm1, %eax
+; AVX2-NEXT: bsrw %ax, %ax
+; AVX2-NEXT: xorl $15, %eax
+; AVX2-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $7, %xmm1, %eax
+; AVX2-NEXT: bsrw %ax, %ax
+; AVX2-NEXT: xorl $15, %eax
+; AVX2-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
+; AVX2-NEXT: vpextrw $1, %xmm0, %eax
+; AVX2-NEXT: bsrw %ax, %ax
+; AVX2-NEXT: xorl $15, %eax
+; AVX2-NEXT: vmovd %xmm0, %ecx
+; AVX2-NEXT: bsrw %cx, %cx
+; AVX2-NEXT: xorl $15, %ecx
+; AVX2-NEXT: vmovd %ecx, %xmm2
+; AVX2-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $2, %xmm0, %eax
+; AVX2-NEXT: bsrw %ax, %ax
+; AVX2-NEXT: xorl $15, %eax
+; AVX2-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $3, %xmm0, %eax
+; AVX2-NEXT: bsrw %ax, %ax
+; AVX2-NEXT: xorl $15, %eax
+; AVX2-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $4, %xmm0, %eax
+; AVX2-NEXT: bsrw %ax, %ax
+; AVX2-NEXT: xorl $15, %eax
+; AVX2-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $5, %xmm0, %eax
+; AVX2-NEXT: bsrw %ax, %ax
+; AVX2-NEXT: xorl $15, %eax
+; AVX2-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $6, %xmm0, %eax
+; AVX2-NEXT: bsrw %ax, %ax
+; AVX2-NEXT: xorl $15, %eax
+; AVX2-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $7, %xmm0, %eax
+; AVX2-NEXT: bsrw %ax, %ax
+; AVX2-NEXT: xorl $15, %eax
+; AVX2-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 -1)
+ ret <16 x i16> %out
+define <32 x i8> @testv32i8(<32 x i8> %in) {
+; AVX1-LABEL: testv32i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpextrb $1, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %ecx
+; AVX1-NEXT: movl $15, %eax
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpextrb $0, %xmm1, %edx
+; AVX1-NEXT: bsrl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: xorl $7, %edx
+; AVX1-NEXT: vmovd %edx, %xmm2
+; AVX1-NEXT: vpinsrb $1, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $2, %xmm1, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $2, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $3, %xmm1, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $4, %xmm1, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $4, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $5, %xmm1, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $5, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $6, %xmm1, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $6, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $7, %xmm1, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $7, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $8, %xmm1, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $9, %xmm1, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $10, %xmm1, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $10, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $11, %xmm1, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $12, %xmm1, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $13, %xmm1, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $14, %xmm1, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $15, %xmm1, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm1
+; AVX1-NEXT: vpextrb $1, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpextrb $0, %xmm0, %edx
+; AVX1-NEXT: bsrl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: xorl $7, %edx
+; AVX1-NEXT: vmovd %edx, %xmm2
+; AVX1-NEXT: vpinsrb $1, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $2, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $2, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $3, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $4, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $4, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $5, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $5, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $6, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $6, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $7, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $7, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $8, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $9, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $10, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $10, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $11, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $12, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $13, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $14, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $15, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv32i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpextrb $1, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %ecx
+; AVX2-NEXT: movl $15, %eax
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpextrb $0, %xmm1, %edx
+; AVX2-NEXT: bsrl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: xorl $7, %edx
+; AVX2-NEXT: vmovd %edx, %xmm2
+; AVX2-NEXT: vpinsrb $1, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $2, %xmm1, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $2, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $3, %xmm1, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $4, %xmm1, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $4, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $5, %xmm1, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $5, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $6, %xmm1, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $6, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $7, %xmm1, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $7, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $8, %xmm1, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $9, %xmm1, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $10, %xmm1, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $10, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $11, %xmm1, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $12, %xmm1, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $13, %xmm1, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $14, %xmm1, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $15, %xmm1, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm1
+; AVX2-NEXT: vpextrb $1, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpextrb $0, %xmm0, %edx
+; AVX2-NEXT: bsrl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: xorl $7, %edx
+; AVX2-NEXT: vmovd %edx, %xmm2
+; AVX2-NEXT: vpinsrb $1, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $2, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $2, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $3, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $4, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $4, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $5, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $5, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $6, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $6, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $7, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $7, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $8, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $9, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $10, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $10, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $11, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $12, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $13, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $14, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $15, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 0)
+ ret <32 x i8> %out
+define <32 x i8> @testv32i8u(<32 x i8> %in) {
+; AVX1-LABEL: testv32i8u:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpextrb $1, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vmovd %ecx, %xmm2
+; AVX1-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $2, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $3, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $4, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $5, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $6, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $7, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $8, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $9, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $10, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $11, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $12, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $13, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $14, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $15, %xmm1, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
+; AVX1-NEXT: vpextrb $1, %xmm0, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
+; AVX1-NEXT: bsrl %ecx, %ecx
+; AVX1-NEXT: xorl $7, %ecx
+; AVX1-NEXT: vmovd %ecx, %xmm2
+; AVX1-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $2, %xmm0, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $3, %xmm0, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $4, %xmm0, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $5, %xmm0, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $6, %xmm0, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $7, %xmm0, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $8, %xmm0, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $9, %xmm0, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $10, %xmm0, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $11, %xmm0, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $12, %xmm0, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $13, %xmm0, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $14, %xmm0, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $15, %xmm0, %eax
+; AVX1-NEXT: bsrl %eax, %eax
+; AVX1-NEXT: xorl $7, %eax
+; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv32i8u:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpextrb $1, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vmovd %ecx, %xmm2
+; AVX2-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $2, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $3, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $4, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $5, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $6, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $7, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $8, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $9, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $10, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $11, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $12, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $13, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $14, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $15, %xmm1, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
+; AVX2-NEXT: vpextrb $1, %xmm0, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
+; AVX2-NEXT: bsrl %ecx, %ecx
+; AVX2-NEXT: xorl $7, %ecx
+; AVX2-NEXT: vmovd %ecx, %xmm2
+; AVX2-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $2, %xmm0, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $3, %xmm0, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $4, %xmm0, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $5, %xmm0, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $6, %xmm0, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $7, %xmm0, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $8, %xmm0, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $9, %xmm0, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $10, %xmm0, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $11, %xmm0, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $12, %xmm0, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $13, %xmm0, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $14, %xmm0, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $15, %xmm0, %eax
+; AVX2-NEXT: bsrl %eax, %eax
+; AVX2-NEXT: xorl $7, %eax
+; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 -1)
+ ret <32 x i8> %out
+define <4 x i64> @foldv4i64() {
+; AVX-LABEL: foldv4i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
+; AVX-NEXT: retq
+ %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0)
+ ret <4 x i64> %out
+define <4 x i64> @foldv4i64u() {
+; AVX-LABEL: foldv4i64u:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
+; AVX-NEXT: retq
+ %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1)
+ ret <4 x i64> %out
+define <8 x i32> @foldv8i32() {
+; AVX-LABEL: foldv8i32:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
+; AVX-NEXT: retq
+ %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0)
+ ret <8 x i32> %out
+define <8 x i32> @foldv8i32u() {
+; AVX-LABEL: foldv8i32u:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
+; AVX-NEXT: retq
+ %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1)
+ ret <8 x i32> %out
+define <16 x i16> @foldv16i16() {
+; AVX-LABEL: foldv16i16:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
+; AVX-NEXT: retq
+ %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0)
+ ret <16 x i16> %out
+define <16 x i16> @foldv16i16u() {
+; AVX-LABEL: foldv16i16u:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
+; AVX-NEXT: retq
+ %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1)
+ ret <16 x i16> %out
+define <32 x i8> @foldv32i8() {
+; AVX-LABEL: foldv32i8:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
+; AVX-NEXT: retq
+ %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0)
+ ret <32 x i8> %out
+define <32 x i8> @foldv32i8u() {
+; AVX-LABEL: foldv32i8u:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
+; AVX-NEXT: retq
+ %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1)
+ ret <32 x i8> %out
+declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1)
+declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
+declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
+declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)
diff --git a/test/CodeGen/X86/vector-popcnt-128.ll b/test/CodeGen/X86/vector-popcnt-128.ll
new file mode 100644
index 0000000..fef445d
--- /dev/null
+++ b/test/CodeGen/X86/vector-popcnt-128.ll
@@ -0,0 +1,462 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+target triple = "x86_64-unknown-unknown"
+define <2 x i64> @testv2i64(<2 x i64> %in) {
+; SSE2-LABEL: testv2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrlq $1, %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: psubq %xmm1, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3689348814741910323,3689348814741910323]
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pand %xmm1, %xmm2
+; SSE2-NEXT: psrlq $2, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: paddq %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrlq $4, %xmm1
+; SSE2-NEXT: paddq %xmm0, %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: psadbw %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv2i64:
+; SSE3: # BB#0:
+; SSE3-NEXT: movdqa %xmm0, %xmm1
+; SSE3-NEXT: psrlq $1, %xmm1
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE3-NEXT: psubq %xmm1, %xmm0
+; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [3689348814741910323,3689348814741910323]
+; SSE3-NEXT: movdqa %xmm0, %xmm2
+; SSE3-NEXT: pand %xmm1, %xmm2
+; SSE3-NEXT: psrlq $2, %xmm0
+; SSE3-NEXT: pand %xmm1, %xmm0
+; SSE3-NEXT: paddq %xmm2, %xmm0
+; SSE3-NEXT: movdqa %xmm0, %xmm1
+; SSE3-NEXT: psrlq $4, %xmm1
+; SSE3-NEXT: paddq %xmm0, %xmm1
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE3-NEXT: pxor %xmm0, %xmm0
+; SSE3-NEXT: psadbw %xmm0, %xmm1
+; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv2i64:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT: movdqa %xmm0, %xmm2
+; SSSE3-NEXT: pand %xmm1, %xmm2
+; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT: movdqa %xmm3, %xmm4
+; SSSE3-NEXT: pshufb %xmm2, %xmm4
+; SSSE3-NEXT: psrlw $4, %xmm0
+; SSSE3-NEXT: pand %xmm1, %xmm0
+; SSSE3-NEXT: pshufb %xmm0, %xmm3
+; SSSE3-NEXT: paddb %xmm4, %xmm3
+; SSSE3-NEXT: pxor %xmm0, %xmm0
+; SSSE3-NEXT: psadbw %xmm3, %xmm0
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: pand %xmm1, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT: movdqa %xmm3, %xmm4
+; SSE41-NEXT: pshufb %xmm2, %xmm4
+; SSE41-NEXT: psrlw $4, %xmm0
+; SSE41-NEXT: pand %xmm1, %xmm0
+; SSE41-NEXT: pshufb %xmm0, %xmm3
+; SSE41-NEXT: paddb %xmm4, %xmm3
+; SSE41-NEXT: pxor %xmm0, %xmm0
+; SSE41-NEXT: psadbw %xmm3, %xmm0
+; SSE41-NEXT: retq
+; AVX-LABEL: testv2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX-NEXT: vpshufb %xmm2, %xmm3, %xmm2
+; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %out = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %in)
+ ret <2 x i64> %out
+define <4 x i32> @testv4i32(<4 x i32> %in) {
+; SSE2-LABEL: testv4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrld $1, %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: psubd %xmm1, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,858993459,858993459]
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pand %xmm1, %xmm2
+; SSE2-NEXT: psrld $2, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: paddd %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrld $4, %xmm1
+; SSE2-NEXT: paddd %xmm0, %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT: psadbw %xmm0, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: psadbw %xmm0, %xmm1
+; SSE2-NEXT: packuswb %xmm2, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv4i32:
+; SSE3: # BB#0:
+; SSE3-NEXT: movdqa %xmm0, %xmm1
+; SSE3-NEXT: psrld $1, %xmm1
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE3-NEXT: psubd %xmm1, %xmm0
+; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,858993459,858993459]
+; SSE3-NEXT: movdqa %xmm0, %xmm2
+; SSE3-NEXT: pand %xmm1, %xmm2
+; SSE3-NEXT: psrld $2, %xmm0
+; SSE3-NEXT: pand %xmm1, %xmm0
+; SSE3-NEXT: paddd %xmm2, %xmm0
+; SSE3-NEXT: movdqa %xmm0, %xmm1
+; SSE3-NEXT: psrld $4, %xmm1
+; SSE3-NEXT: paddd %xmm0, %xmm1
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE3-NEXT: pxor %xmm0, %xmm0
+; SSE3-NEXT: movdqa %xmm1, %xmm2
+; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE3-NEXT: psadbw %xmm0, %xmm2
+; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE3-NEXT: psadbw %xmm0, %xmm1
+; SSE3-NEXT: packuswb %xmm2, %xmm1
+; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv4i32:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT: movdqa %xmm0, %xmm3
+; SSSE3-NEXT: pand %xmm2, %xmm3
+; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT: movdqa %xmm1, %xmm4
+; SSSE3-NEXT: pshufb %xmm3, %xmm4
+; SSSE3-NEXT: psrlw $4, %xmm0
+; SSSE3-NEXT: pand %xmm2, %xmm0
+; SSSE3-NEXT: pshufb %xmm0, %xmm1
+; SSSE3-NEXT: paddb %xmm4, %xmm1
+; SSSE3-NEXT: pxor %xmm0, %xmm0
+; SSSE3-NEXT: movdqa %xmm1, %xmm2
+; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSSE3-NEXT: psadbw %xmm0, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT: psadbw %xmm0, %xmm1
+; SSSE3-NEXT: packuswb %xmm2, %xmm1
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT: movdqa %xmm0, %xmm3
+; SSE41-NEXT: pand %xmm2, %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT: movdqa %xmm1, %xmm4
+; SSE41-NEXT: pshufb %xmm3, %xmm4
+; SSE41-NEXT: psrlw $4, %xmm0
+; SSE41-NEXT: pand %xmm2, %xmm0
+; SSE41-NEXT: pshufb %xmm0, %xmm1
+; SSE41-NEXT: paddb %xmm4, %xmm1
+; SSE41-NEXT: pxor %xmm0, %xmm0
+; SSE41-NEXT: movdqa %xmm1, %xmm2
+; SSE41-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE41-NEXT: psadbw %xmm0, %xmm2
+; SSE41-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE41-NEXT: psadbw %xmm0, %xmm1
+; SSE41-NEXT: packuswb %xmm2, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+; AVX-LABEL: testv4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX-NEXT: vpshufb %xmm2, %xmm3, %xmm2
+; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT: vpsadbw %xmm2, %xmm1, %xmm2
+; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %out = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %in)
+ ret <4 x i32> %out
+define <8 x i16> @testv8i16(<8 x i16> %in) {
+; SSE2-LABEL: testv8i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrlw $1, %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: psubw %xmm1, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [13107,13107,13107,13107,13107,13107,13107,13107]
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pand %xmm1, %xmm2
+; SSE2-NEXT: psrlw $2, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: paddw %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrlw $4, %xmm1
+; SSE2-NEXT: paddw %xmm0, %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: psllw $8, %xmm0
+; SSE2-NEXT: paddb %xmm1, %xmm0
+; SSE2-NEXT: psrlw $8, %xmm0
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv8i16:
+; SSE3: # BB#0:
+; SSE3-NEXT: movdqa %xmm0, %xmm1
+; SSE3-NEXT: psrlw $1, %xmm1
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE3-NEXT: psubw %xmm1, %xmm0
+; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [13107,13107,13107,13107,13107,13107,13107,13107]
+; SSE3-NEXT: movdqa %xmm0, %xmm2
+; SSE3-NEXT: pand %xmm1, %xmm2
+; SSE3-NEXT: psrlw $2, %xmm0
+; SSE3-NEXT: pand %xmm1, %xmm0
+; SSE3-NEXT: paddw %xmm2, %xmm0
+; SSE3-NEXT: movdqa %xmm0, %xmm1
+; SSE3-NEXT: psrlw $4, %xmm1
+; SSE3-NEXT: paddw %xmm0, %xmm1
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: psllw $8, %xmm0
+; SSE3-NEXT: paddb %xmm1, %xmm0
+; SSE3-NEXT: psrlw $8, %xmm0
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv8i16:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT: movdqa %xmm0, %xmm2
+; SSSE3-NEXT: pand %xmm1, %xmm2
+; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT: movdqa %xmm3, %xmm4
+; SSSE3-NEXT: pshufb %xmm2, %xmm4
+; SSSE3-NEXT: psrlw $4, %xmm0
+; SSSE3-NEXT: pand %xmm1, %xmm0
+; SSSE3-NEXT: pshufb %xmm0, %xmm3
+; SSSE3-NEXT: paddb %xmm4, %xmm3
+; SSSE3-NEXT: movdqa %xmm3, %xmm0
+; SSSE3-NEXT: psllw $8, %xmm0
+; SSSE3-NEXT: paddb %xmm3, %xmm0
+; SSSE3-NEXT: psrlw $8, %xmm0
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv8i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: pand %xmm1, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT: movdqa %xmm3, %xmm4
+; SSE41-NEXT: pshufb %xmm2, %xmm4
+; SSE41-NEXT: psrlw $4, %xmm0
+; SSE41-NEXT: pand %xmm1, %xmm0
+; SSE41-NEXT: pshufb %xmm0, %xmm3
+; SSE41-NEXT: paddb %xmm4, %xmm3
+; SSE41-NEXT: movdqa %xmm3, %xmm0
+; SSE41-NEXT: psllw $8, %xmm0
+; SSE41-NEXT: paddb %xmm3, %xmm0
+; SSE41-NEXT: psrlw $8, %xmm0
+; SSE41-NEXT: retq
+; AVX-LABEL: testv8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX-NEXT: vpshufb %xmm2, %xmm3, %xmm2
+; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpsllw $8, %xmm0, %xmm1
+; AVX-NEXT: vpaddb %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %out = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %in)
+ ret <8 x i16> %out
+define <16 x i8> @testv16i8(<16 x i8> %in) {
+; SSE2-LABEL: testv16i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrlw $1, %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: psubb %xmm1, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pand %xmm1, %xmm2
+; SSE2-NEXT: psrlw $2, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: paddb %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrlw $4, %xmm1
+; SSE2-NEXT: paddb %xmm0, %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv16i8:
+; SSE3: # BB#0:
+; SSE3-NEXT: movdqa %xmm0, %xmm1
+; SSE3-NEXT: psrlw $1, %xmm1
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE3-NEXT: psubb %xmm1, %xmm0
+; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; SSE3-NEXT: movdqa %xmm0, %xmm2
+; SSE3-NEXT: pand %xmm1, %xmm2
+; SSE3-NEXT: psrlw $2, %xmm0
+; SSE3-NEXT: pand %xmm1, %xmm0
+; SSE3-NEXT: paddb %xmm2, %xmm0
+; SSE3-NEXT: movdqa %xmm0, %xmm1
+; SSE3-NEXT: psrlw $4, %xmm1
+; SSE3-NEXT: paddb %xmm0, %xmm1
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv16i8:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT: movdqa %xmm0, %xmm3
+; SSSE3-NEXT: pand %xmm2, %xmm3
+; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT: movdqa %xmm1, %xmm4
+; SSSE3-NEXT: pshufb %xmm3, %xmm4
+; SSSE3-NEXT: psrlw $4, %xmm0
+; SSSE3-NEXT: pand %xmm2, %xmm0
+; SSSE3-NEXT: pshufb %xmm0, %xmm1
+; SSSE3-NEXT: paddb %xmm4, %xmm1
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv16i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT: movdqa %xmm0, %xmm3
+; SSE41-NEXT: pand %xmm2, %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT: movdqa %xmm1, %xmm4
+; SSE41-NEXT: pshufb %xmm3, %xmm4
+; SSE41-NEXT: psrlw $4, %xmm0
+; SSE41-NEXT: pand %xmm2, %xmm0
+; SSE41-NEXT: pshufb %xmm0, %xmm1
+; SSE41-NEXT: paddb %xmm4, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+; AVX-LABEL: testv16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX-NEXT: vpshufb %xmm2, %xmm3, %xmm2
+; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %out = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %in)
+ ret <16 x i8> %out
+define <2 x i64> @foldv2i64() {
+; SSE-LABEL: foldv2i64:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,64]
+; SSE-NEXT: retq
+; AVX-LABEL: foldv2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,64]
+; AVX-NEXT: retq
+ %out = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> <i64 256, i64 -1>)
+ ret <2 x i64> %out
+define <4 x i32> @foldv4i32() {
+; SSE-LABEL: foldv4i32:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,32,0,8]
+; SSE-NEXT: retq
+; AVX-LABEL: foldv4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,32,0,8]
+; AVX-NEXT: retq
+ %out = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> <i32 256, i32 -1, i32 0, i32 255>)
+ ret <4 x i32> %out
+define <8 x i16> @foldv8i16() {
+; SSE-LABEL: foldv8i16:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,16,0,8,0,3,2,3]
+; SSE-NEXT: retq
+; AVX-LABEL: foldv8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,16,0,8,0,3,2,3]
+; AVX-NEXT: retq
+ %out = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88>)
+ ret <8 x i16> %out
+define <16 x i8> @foldv16i8() {
+; SSE-LABEL: foldv16i8:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1]
+; SSE-NEXT: retq
+; AVX-LABEL: foldv16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1]
+; AVX-NEXT: retq
+ %out = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32>)
+ ret <16 x i8> %out
+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
+declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
diff --git a/test/CodeGen/X86/vector-popcnt-256.ll b/test/CodeGen/X86/vector-popcnt-256.ll
new file mode 100644
index 0000000..7ce4f71
--- /dev/null
+++ b/test/CodeGen/X86/vector-popcnt-256.ll
@@ -0,0 +1,220 @@
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+target triple = "x86_64-unknown-unknown"
+define <4 x i64> @testv4i64(<4 x i64> %in) {
+; AVX1-LABEL: testv4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpsadbw %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
+; AVX1-NEXT: vpsadbw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
+; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpsadbw %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+ %out = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %in)
+ ret <4 x i64> %out
+define <8 x i32> @testv8i32(<8 x i32> %in) {
+; AVX1-LABEL: testv8i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; AVX1-NEXT: vpsadbw %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; AVX1-NEXT: vpsadbw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv8i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
+; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+; AVX2-NEXT: vpsadbw %ymm2, %ymm1, %ymm2
+; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+; AVX2-NEXT: vpsadbw %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %in)
+ ret <8 x i32> %out
+define <16 x i16> @testv16i16(<16 x i16> %in) {
+; AVX1-LABEL: testv16i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
+; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
+; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
+; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4
+; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm4
+; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv16i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
+; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1
+; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %in)
+ ret <16 x i16> %out
+define <32 x i8> @testv32i8(<32 x i8> %in) {
+; AVX1-LABEL: testv32i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm3
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv32i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
+; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %in)
+ ret <32 x i8> %out
+define <4 x i64> @foldv4i64() {
+; AVX-LABEL: foldv4i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,64,0,8]
+; AVX-NEXT: retq
+ %out = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>)
+ ret <4 x i64> %out
+define <8 x i32> @foldv8i32() {
+; AVX-LABEL: foldv8i32:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,32,0,8,16,3,2,3]
+; AVX-NEXT: retq
+ %out = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>)
+ ret <8 x i32> %out
+define <16 x i16> @foldv16i16() {
+; AVX-LABEL: foldv16i16:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,16,0,8,0,3,2,3,15,7,1,1,1,1,1,1]
+; AVX-NEXT: retq
+ %out = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>)
+ ret <16 x i16> %out
+define <32 x i8> @foldv32i8() {
+; AVX-LABEL: foldv32i8:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1,1,1,0,0,1,2,3,4,5,6,7,8,2,2,3,7]
+; AVX-NEXT: retq
+ %out = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>)
+ ret <32 x i8> %out
+declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
+declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
+declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
+declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll
index 1b42a63..944ec4b 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -843,7 +843,6 @@ define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
; ALL-LABEL: insert_reg_and_zero_v4f64:
; ALL: # BB#0:
-; ALL-NEXT: # kill: XMM0<def> XMM0<kill> YMM0<def>
; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; ALL-NEXT: retq
diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll
index 62d4af7..8dc7623 100644
--- a/test/CodeGen/X86/vector-shuffle-512-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll
@@ -15,9 +15,8 @@ define <8 x double> @shuffle_v8f64_00000000(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00000010:
; ALL: # BB#0:
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm1
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
ret <8 x double> %shuffle
@@ -26,9 +25,8 @@ define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00000200(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00000200:
; ALL: # BB#0:
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm1
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
ret <8 x double> %shuffle
@@ -37,9 +35,8 @@ define <8 x double> @shuffle_v8f64_00000200(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00003000(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00003000:
; ALL: # BB#0:
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm1
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,0,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
ret <8 x double> %shuffle
@@ -48,11 +45,8 @@ define <8 x double> @shuffle_v8f64_00003000(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00040000:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vbroadcastsd %xmm1, %ymm1
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
ret <8 x double> %shuffle
@@ -61,11 +55,8 @@ define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00500000:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2,3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,1,0]
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x double> %shuffle
@@ -74,11 +65,8 @@ define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_06000000:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2],ymm0[3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,0,0]
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x double> %shuffle
@@ -87,11 +75,11 @@ define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_70000000:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,0,0,0]
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; ALL-NEXT: movl $7, %eax
+; ALL-NEXT: vpinsrq $0, %rax, %xmm1, %xmm2
+; ALL-NEXT: vinserti32x4 $0, %xmm2, %zmm1, %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x double> %shuffle
@@ -100,10 +88,7 @@ define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_01014545:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT: vpermpd $68, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
ret <8 x double> %shuffle
@@ -112,9 +97,8 @@ define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00112233:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,0,1,1]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,3,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
ret <8 x double> %shuffle
@@ -123,9 +107,8 @@ define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00001111(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00001111:
; ALL: # BB#0:
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm1
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
ret <8 x double> %shuffle
@@ -134,11 +117,7 @@ define <8 x double> @shuffle_v8f64_00001111(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_81a3c5e7(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_81a3c5e7:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vshufpd $170, %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
ret <8 x double> %shuffle
@@ -147,10 +126,9 @@ define <8 x double> @shuffle_v8f64_81a3c5e7(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_08080808(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_08080808:
; ALL: # BB#0:
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: vbroadcastsd %xmm1, %ymm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2pd %zmm1, %zmm0, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
ret <8 x double> %shuffle
@@ -159,15 +137,9 @@ define <8 x double> @shuffle_v8f64_08080808(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_08084c4c(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_08084c4c:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
-; ALL-NEXT: vbroadcastsd %xmm3, %ymm3
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3]
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: vbroadcastsd %xmm1, %ymm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2pd %zmm1, %zmm0, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
ret <8 x double> %shuffle
@@ -176,13 +148,9 @@ define <8 x double> @shuffle_v8f64_08084c4c(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_8823cc67(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_8823cc67:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
-; ALL-NEXT: vbroadcastsd %xmm3, %ymm3
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3]
-; ALL-NEXT: vbroadcastsd %xmm1, %ymm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2pd %zmm0, %zmm1, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
ret <8 x double> %shuffle
@@ -191,13 +159,9 @@ define <8 x double> @shuffle_v8f64_8823cc67(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_9832dc76(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_9832dc76:
; ALL: # BB#0:
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm1[0,1],ymm0[2,3]
-; ALL-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[1,0,3,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2pd %zmm0, %zmm1, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
ret <8 x double> %shuffle
@@ -206,13 +170,9 @@ define <8 x double> @shuffle_v8f64_9832dc76(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_9810dc54(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_9810dc54:
; ALL: # BB#0:
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm2
-; ALL-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[1,0,3,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm1
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2pd %zmm0, %zmm1, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
ret <8 x double> %shuffle
@@ -221,15 +181,9 @@ define <8 x double> @shuffle_v8f64_9810dc54(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_08194c5d(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_08194c5d:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,0,2,1]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm3
-; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,1,1,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2pd %zmm1, %zmm0, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
ret <8 x double> %shuffle
@@ -238,15 +192,9 @@ define <8 x double> @shuffle_v8f64_08194c5d(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_2a3b6e7f(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_2a3b6e7f:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,2,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm3
-; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[2,1,3,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2pd %zmm1, %zmm0, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
ret <8 x double> %shuffle
@@ -255,13 +203,9 @@ define <8 x double> @shuffle_v8f64_2a3b6e7f(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_08192a3b(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_08192a3b:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm1[0,2,2,3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm0[2,1,3,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2pd %zmm1, %zmm0, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
ret <8 x double> %shuffle
@@ -270,11 +214,9 @@ define <8 x double> @shuffle_v8f64_08192a3b(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_08991abb(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_08991abb:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm1[0,0,1,1]
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm0[0],ymm2[1,2,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,2,3,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2pd %zmm0, %zmm1, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
ret <8 x double> %shuffle
@@ -283,12 +225,9 @@ define <8 x double> @shuffle_v8f64_08991abb(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_091b2d3f(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_091b2d3f:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm0[2,1,3,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2pd %zmm1, %zmm0, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
ret <8 x double> %shuffle
@@ -297,11 +236,9 @@ define <8 x double> @shuffle_v8f64_091b2d3f(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_09ab1def(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_09ab1def:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpermilpd {{.*#+}} ymm3 = ymm0[1,0,2,2]
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1,2,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2pd %zmm0, %zmm1, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
ret <8 x double> %shuffle
@@ -310,10 +247,7 @@ define <8 x double> @shuffle_v8f64_09ab1def(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00014445(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00014445:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,1]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermpd $64, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
ret <8 x double> %shuffle
@@ -322,10 +256,7 @@ define <8 x double> @shuffle_v8f64_00014445(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00204464(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00204464:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,0,2,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermpd $32, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
ret <8 x double> %shuffle
@@ -334,10 +265,7 @@ define <8 x double> @shuffle_v8f64_00204464(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_03004744(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_03004744:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,3,0,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermpd $12, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
ret <8 x double> %shuffle
@@ -346,10 +274,7 @@ define <8 x double> @shuffle_v8f64_03004744(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_10005444(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_10005444:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[1,0,0,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermpd $1, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
ret <8 x double> %shuffle
@@ -358,10 +283,7 @@ define <8 x double> @shuffle_v8f64_10005444(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_22006644(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_22006644:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[2,2,0,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermpd $10, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
ret <8 x double> %shuffle
@@ -370,10 +292,7 @@ define <8 x double> @shuffle_v8f64_22006644(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_33307774(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_33307774:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[3,3,3,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermpd $63, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
ret <8 x double> %shuffle
@@ -382,10 +301,7 @@ define <8 x double> @shuffle_v8f64_33307774(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_32107654(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_32107654:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[3,2,1,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermpd $27, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x double> %shuffle
@@ -394,10 +310,7 @@ define <8 x double> @shuffle_v8f64_32107654(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00234467(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00234467:
; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $136, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
ret <8 x double> %shuffle
@@ -406,10 +319,7 @@ define <8 x double> @shuffle_v8f64_00234467(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00224466(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00224466:
; ALL: # BB#0:
-; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $0, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
ret <8 x double> %shuffle
@@ -418,10 +328,7 @@ define <8 x double> @shuffle_v8f64_00224466(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_10325476(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_10325476:
; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $85, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
ret <8 x double> %shuffle
@@ -430,10 +337,7 @@ define <8 x double> @shuffle_v8f64_10325476(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_11335577(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_11335577:
; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $255, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
ret <8 x double> %shuffle
@@ -442,10 +346,7 @@ define <8 x double> @shuffle_v8f64_11335577(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_10235467(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_10235467:
; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,0,2,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $153, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
ret <8 x double> %shuffle
@@ -454,10 +355,7 @@ define <8 x double> @shuffle_v8f64_10235467(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_10225466(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_10225466:
; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,0,2,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $17, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
ret <8 x double> %shuffle
@@ -466,10 +364,8 @@ define <8 x double> @shuffle_v8f64_10225466(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00015444(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00015444:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,1]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
ret <8 x double> %shuffle
@@ -478,10 +374,8 @@ define <8 x double> @shuffle_v8f64_00015444(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00204644(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00204644:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,0,2,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
ret <8 x double> %shuffle
@@ -490,10 +384,8 @@ define <8 x double> @shuffle_v8f64_00204644(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_03004474(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_03004474:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,3,0,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,3,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
ret <8 x double> %shuffle
@@ -502,10 +394,8 @@ define <8 x double> @shuffle_v8f64_03004474(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_10004444(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_10004444:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[1,0,0,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x double> %shuffle
@@ -514,10 +404,8 @@ define <8 x double> @shuffle_v8f64_10004444(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_22006446(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_22006446:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[2,2,0,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,0,2]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
ret <8 x double> %shuffle
@@ -526,10 +414,8 @@ define <8 x double> @shuffle_v8f64_22006446(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_33307474(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_33307474:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[3,3,3,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,0,3,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
ret <8 x double> %shuffle
@@ -538,9 +424,8 @@ define <8 x double> @shuffle_v8f64_33307474(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_32104567(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_32104567:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[3,2,1,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
ret <8 x double> %shuffle
@@ -549,10 +434,8 @@ define <8 x double> @shuffle_v8f64_32104567(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00236744(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00236744:
; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
ret <8 x double> %shuffle
@@ -561,10 +444,8 @@ define <8 x double> @shuffle_v8f64_00236744(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00226644(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00226644:
; ALL: # BB#0:
-; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
ret <8 x double> %shuffle
@@ -573,9 +454,7 @@ define <8 x double> @shuffle_v8f64_00226644(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_10324567(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_10324567:
; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $165, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
ret <8 x double> %shuffle
@@ -584,9 +463,7 @@ define <8 x double> @shuffle_v8f64_10324567(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_11334567(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_11334567:
; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $175, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x double> %shuffle
@@ -595,9 +472,7 @@ define <8 x double> @shuffle_v8f64_11334567(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_01235467(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_01235467:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT: vpermilpd $154, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
ret <8 x double> %shuffle
@@ -606,9 +481,7 @@ define <8 x double> @shuffle_v8f64_01235467(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_01235466(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_01235466:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,2]
-; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT: vpermilpd $26, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
ret <8 x double> %shuffle
@@ -617,10 +490,8 @@ define <8 x double> @shuffle_v8f64_01235466(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_002u6u44:
; ALL: # BB#0:
-; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
ret <8 x double> %shuffle
@@ -629,10 +500,8 @@ define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00uu66uu(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00uu66uu:
; ALL: # BB#0:
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm1
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
ret <8 x double> %shuffle
@@ -641,9 +510,7 @@ define <8 x double> @shuffle_v8f64_00uu66uu(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_103245uu(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_103245uu:
; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $37, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
ret <8 x double> %shuffle
@@ -652,9 +519,7 @@ define <8 x double> @shuffle_v8f64_103245uu(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_1133uu67(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_1133uu67:
; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $143, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
ret <8 x double> %shuffle
@@ -663,9 +528,7 @@ define <8 x double> @shuffle_v8f64_1133uu67(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_0uu354uu(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_0uu354uu:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,2]
-; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT: vpermilpd $24, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
ret <8 x double> %shuffle
@@ -674,9 +537,7 @@ define <8 x double> @shuffle_v8f64_0uu354uu(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_uuu3uu66(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_uuu3uu66:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
-; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT: vpermilpd $8, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
ret <8 x double> %shuffle
@@ -685,16 +546,9 @@ define <8 x double> @shuffle_v8f64_uuu3uu66(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_c348cda0(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_c348cda0:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm2[0,1]
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
-; ALL-NEXT: vbroadcastsd %xmm1, %ymm4
-; ALL-NEXT: vblendpd {{.*#+}} ymm4 = ymm3[0,1,2],ymm4[3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm4[0],ymm2[1,2],ymm4[3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm3[0,1],ymm1[2],ymm3[3]
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2pd %zmm0, %zmm1, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
ret <8 x double> %shuffle
@@ -703,17 +557,9 @@ define <8 x double> @shuffle_v8f64_c348cda0(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_f511235a(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_f511235a:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vblendpd {{.*#+}} ymm3 = ymm0[0],ymm2[1],ymm0[2,3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[2,3,1,3]
-; ALL-NEXT: vmovddup {{.*#+}} ymm4 = ymm1[0,0,2,2]
-; ALL-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0,1,2],ymm4[3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,1]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm1
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm3, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2pd %zmm1, %zmm0, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
ret <8 x double> %shuffle
@@ -731,9 +577,8 @@ define <8 x i64> @shuffle_v8i64_00000000(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00000010(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00000010:
; ALL: # BB#0:
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm1
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
ret <8 x i64> %shuffle
@@ -742,9 +587,8 @@ define <8 x i64> @shuffle_v8i64_00000010(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00000200(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00000200:
; ALL: # BB#0:
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm1
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
ret <8 x i64> %shuffle
@@ -753,9 +597,8 @@ define <8 x i64> @shuffle_v8i64_00000200(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00003000(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00003000:
; ALL: # BB#0:
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm1
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
ret <8 x i64> %shuffle
@@ -764,11 +607,8 @@ define <8 x i64> @shuffle_v8i64_00003000(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00040000(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00040000:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpbroadcastq %xmm1, %ymm1
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
-; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5],ymm1[6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
ret <8 x i64> %shuffle
@@ -777,11 +617,8 @@ define <8 x i64> @shuffle_v8i64_00040000(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00500000(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00500000:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,0]
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i64> %shuffle
@@ -790,11 +627,8 @@ define <8 x i64> @shuffle_v8i64_00500000(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_06000000(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_06000000:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,0,0]
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i64> %shuffle
@@ -803,11 +637,11 @@ define <8 x i64> @shuffle_v8i64_06000000(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_70000000:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5],ymm1[6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[3,0,0,0]
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; ALL-NEXT: movl $7, %eax
+; ALL-NEXT: vpinsrq $0, %rax, %xmm1, %xmm2
+; ALL-NEXT: vinserti32x4 $0, %xmm2, %zmm1, %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i64> %shuffle
@@ -816,10 +650,7 @@ define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_01014545:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
-; ALL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT: vpermq $68, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
ret <8 x i64> %shuffle
@@ -828,9 +659,8 @@ define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00112233(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00112233:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,1,1]
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,3,3]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
ret <8 x i64> %shuffle
@@ -839,9 +669,8 @@ define <8 x i64> @shuffle_v8i64_00112233(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00001111(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00001111:
; ALL: # BB#0:
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm1
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,1,1]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
ret <8 x i64> %shuffle
@@ -850,11 +679,7 @@ define <8 x i64> @shuffle_v8i64_00001111(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_81a3c5e7(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_81a3c5e7:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vshufpd $170, %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
ret <8 x i64> %shuffle
@@ -863,10 +688,9 @@ define <8 x i64> @shuffle_v8i64_81a3c5e7(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_08080808(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_08080808:
; ALL: # BB#0:
-; ALL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: vpbroadcastq %xmm1, %ymm1
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2q %zmm1, %zmm0, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
ret <8 x i64> %shuffle
@@ -875,15 +699,9 @@ define <8 x i64> @shuffle_v8i64_08080808(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_08084c4c(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_08084c4c:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vinserti128 $1, %xmm2, %ymm2, %ymm2
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
-; ALL-NEXT: vpbroadcastq %xmm3, %ymm3
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm3[2,3],ymm2[4,5],ymm3[6,7]
-; ALL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: vpbroadcastq %xmm1, %ymm1
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2q %zmm1, %zmm0, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
ret <8 x i64> %shuffle
@@ -892,13 +710,9 @@ define <8 x i64> @shuffle_v8i64_08084c4c(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_8823cc67(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_8823cc67:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
-; ALL-NEXT: vpbroadcastq %xmm3, %ymm3
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
-; ALL-NEXT: vpbroadcastq %xmm1, %ymm1
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
ret <8 x i64> %shuffle
@@ -907,13 +721,9 @@ define <8 x i64> @shuffle_v8i64_8823cc67(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_9832dc76(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_9832dc76:
; ALL: # BB#0:
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; ALL-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; ALL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
ret <8 x i64> %shuffle
@@ -922,13 +732,9 @@ define <8 x i64> @shuffle_v8i64_9832dc76(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_9810dc54(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_9810dc54:
; ALL: # BB#0:
-; ALL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm2
-; ALL-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
-; ALL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
ret <8 x i64> %shuffle
@@ -937,15 +743,9 @@ define <8 x i64> @shuffle_v8i64_9810dc54(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_08194c5d(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_08194c5d:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,0,2,1]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; ALL-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,1,1,3]
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2q %zmm1, %zmm0, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
ret <8 x i64> %shuffle
@@ -954,15 +754,9 @@ define <8 x i64> @shuffle_v8i64_08194c5d(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_2a3b6e7f(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_2a3b6e7f:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; ALL-NEXT: vpermq {{.*#+}} ymm3 = ymm3[2,1,3,3]
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,3]
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2q %zmm1, %zmm0, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
ret <8 x i64> %shuffle
@@ -971,13 +765,9 @@ define <8 x i64> @shuffle_v8i64_2a3b6e7f(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_08192a3b(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_08192a3b:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm2 = ymm1[0,2,2,3]
-; ALL-NEXT: vpermq {{.*#+}} ymm3 = ymm0[2,1,3,3]
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2q %zmm1, %zmm0, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
ret <8 x i64> %shuffle
@@ -986,11 +776,9 @@ define <8 x i64> @shuffle_v8i64_08192a3b(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_08991abb(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_08991abb:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm2 = ymm1[0,0,1,1]
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm0[0,1],ymm2[2,3,4,5,6,7]
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,3]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
ret <8 x i64> %shuffle
@@ -999,12 +787,9 @@ define <8 x i64> @shuffle_v8i64_08991abb(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_091b2d3f(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_091b2d3f:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpermq {{.*#+}} ymm3 = ymm0[2,1,3,3]
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2q %zmm1, %zmm0, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
ret <8 x i64> %shuffle
@@ -1013,11 +798,9 @@ define <8 x i64> @shuffle_v8i64_091b2d3f(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_09ab1def(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_09ab1def:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3,4,5,6,7]
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
ret <8 x i64> %shuffle
@@ -1026,10 +809,7 @@ define <8 x i64> @shuffle_v8i64_09ab1def(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00014445(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00014445:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,0,1]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermq $64, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
ret <8 x i64> %shuffle
@@ -1038,10 +818,7 @@ define <8 x i64> @shuffle_v8i64_00014445(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00204464(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00204464:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,2,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermq $32, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
ret <8 x i64> %shuffle
@@ -1050,10 +827,7 @@ define <8 x i64> @shuffle_v8i64_00204464(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_03004744(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_03004744:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,3,0,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermq $12, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
ret <8 x i64> %shuffle
@@ -1062,10 +836,7 @@ define <8 x i64> @shuffle_v8i64_03004744(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_10005444(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_10005444:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[1,0,0,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermq $1, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
ret <8 x i64> %shuffle
@@ -1074,10 +845,7 @@ define <8 x i64> @shuffle_v8i64_10005444(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_22006644(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_22006644:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,2,0,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermq $10, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
ret <8 x i64> %shuffle
@@ -1086,10 +854,7 @@ define <8 x i64> @shuffle_v8i64_22006644(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_33307774(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_33307774:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[3,3,3,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermq $63, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
ret <8 x i64> %shuffle
@@ -1098,10 +863,7 @@ define <8 x i64> @shuffle_v8i64_33307774(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_32107654(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_32107654:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[3,2,1,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermq $27, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x i64> %shuffle
@@ -1110,10 +872,7 @@ define <8 x i64> @shuffle_v8i64_32107654(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00234467(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00234467:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,2,3]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,3]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $136, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
ret <8 x i64> %shuffle
@@ -1122,10 +881,7 @@ define <8 x i64> @shuffle_v8i64_00234467(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00224466(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00224466:
; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $0, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
ret <8 x i64> %shuffle
@@ -1134,10 +890,7 @@ define <8 x i64> @shuffle_v8i64_00224466(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_10325476(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_10325476:
; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $85, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
ret <8 x i64> %shuffle
@@ -1146,10 +899,7 @@ define <8 x i64> @shuffle_v8i64_10325476(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_11335577(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_11335577:
; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $255, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
ret <8 x i64> %shuffle
@@ -1158,10 +908,7 @@ define <8 x i64> @shuffle_v8i64_11335577(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_10235467(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_10235467:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[1,0,2,3]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,2,3]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $153, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
ret <8 x i64> %shuffle
@@ -1170,10 +917,7 @@ define <8 x i64> @shuffle_v8i64_10235467(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_10225466(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_10225466:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[1,0,2,2]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,2,2]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $17, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
ret <8 x i64> %shuffle
@@ -1182,10 +926,8 @@ define <8 x i64> @shuffle_v8i64_10225466(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00015444(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00015444:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,0,1]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
ret <8 x i64> %shuffle
@@ -1194,10 +936,8 @@ define <8 x i64> @shuffle_v8i64_00015444(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00204644(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00204644:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,2,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
ret <8 x i64> %shuffle
@@ -1206,10 +946,8 @@ define <8 x i64> @shuffle_v8i64_00204644(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_03004474(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_03004474:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,3,0,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,3,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
ret <8 x i64> %shuffle
@@ -1218,10 +956,8 @@ define <8 x i64> @shuffle_v8i64_03004474(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_10004444(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_10004444:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[1,0,0,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x i64> %shuffle
@@ -1230,10 +966,8 @@ define <8 x i64> @shuffle_v8i64_10004444(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_22006446(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_22006446:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,2,0,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,0,0,2]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
ret <8 x i64> %shuffle
@@ -1242,10 +976,8 @@ define <8 x i64> @shuffle_v8i64_22006446(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_33307474(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_33307474:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[3,3,3,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,3,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
ret <8 x i64> %shuffle
@@ -1254,9 +986,8 @@ define <8 x i64> @shuffle_v8i64_33307474(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_32104567(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_32104567:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[3,2,1,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
ret <8 x i64> %shuffle
@@ -1265,10 +996,8 @@ define <8 x i64> @shuffle_v8i64_32104567(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00236744(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00236744:
; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,2,3]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
ret <8 x i64> %shuffle
@@ -1277,10 +1006,8 @@ define <8 x i64> @shuffle_v8i64_00236744(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00226644(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00226644:
; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
ret <8 x i64> %shuffle
@@ -1289,9 +1016,7 @@ define <8 x i64> @shuffle_v8i64_00226644(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_10324567(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_10324567:
; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $165, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
ret <8 x i64> %shuffle
@@ -1300,9 +1025,7 @@ define <8 x i64> @shuffle_v8i64_10324567(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_11334567(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_11334567:
; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $175, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i64> %shuffle
@@ -1311,9 +1034,7 @@ define <8 x i64> @shuffle_v8i64_11334567(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_01235467(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_01235467:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,0,2,3]
-; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT: vpermilpd $154, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
ret <8 x i64> %shuffle
@@ -1322,9 +1043,7 @@ define <8 x i64> @shuffle_v8i64_01235467(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_01235466(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_01235466:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,0,2,2]
-; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT: vpermilpd $26, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
ret <8 x i64> %shuffle
@@ -1333,10 +1052,8 @@ define <8 x i64> @shuffle_v8i64_01235466(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_002u6u44(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_002u6u44:
; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
ret <8 x i64> %shuffle
@@ -1345,10 +1062,8 @@ define <8 x i64> @shuffle_v8i64_002u6u44(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00uu66uu(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00uu66uu:
; ALL: # BB#0:
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm1
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
ret <8 x i64> %shuffle
@@ -1357,9 +1072,7 @@ define <8 x i64> @shuffle_v8i64_00uu66uu(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_103245uu(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_103245uu:
; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $37, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
ret <8 x i64> %shuffle
@@ -1368,9 +1081,7 @@ define <8 x i64> @shuffle_v8i64_103245uu(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_1133uu67(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_1133uu67:
; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilpd $143, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
ret <8 x i64> %shuffle
@@ -1379,9 +1090,7 @@ define <8 x i64> @shuffle_v8i64_1133uu67(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_0uu354uu(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_0uu354uu:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT: vpermilpd $24, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
ret <8 x i64> %shuffle
@@ -1390,9 +1099,7 @@ define <8 x i64> @shuffle_v8i64_0uu354uu(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_uuu3uu66(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_uuu3uu66:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
-; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT: vpermilpd $8, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
ret <8 x i64> %shuffle
@@ -1401,15 +1108,9 @@ define <8 x i64> @shuffle_v8i64_uuu3uu66(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_6caa87e5(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_6caa87e5:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpblendd {{.*#+}} ymm3 = ymm1[0,1,2,3],ymm2[4,5],ymm1[6,7]
-; ALL-NEXT: vpblendd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2,3],ymm3[4,5],ymm0[6,7]
-; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
+; ALL-NEXT: vmovdqa64 {{.*}}(%rip), %zmm2
+; ALL-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
ret <8 x i64> %shuffle
diff --git a/test/CodeGen/X86/vector-tzcnt-128.ll b/test/CodeGen/X86/vector-tzcnt-128.ll
new file mode 100644
index 0000000..422fe052
--- /dev/null
+++ b/test/CodeGen/X86/vector-tzcnt-128.ll
@@ -0,0 +1,1788 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+target triple = "x86_64-unknown-unknown"
+define <2 x i64> @testv2i64(<2 x i64> %in) {
+; SSE2-LABEL: testv2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: bsfq %rax, %rax
+; SSE2-NEXT: movl $64, %ecx
+; SSE2-NEXT: cmoveq %rcx, %rax
+; SSE2-NEXT: movd %rax, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: bsfq %rax, %rax
+; SSE2-NEXT: cmoveq %rcx, %rax
+; SSE2-NEXT: movd %rax, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv2i64:
+; SSE3: # BB#0:
+; SSE3-NEXT: movd %xmm0, %rax
+; SSE3-NEXT: bsfq %rax, %rax
+; SSE3-NEXT: movl $64, %ecx
+; SSE3-NEXT: cmoveq %rcx, %rax
+; SSE3-NEXT: movd %rax, %xmm1
+; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE3-NEXT: movd %xmm0, %rax
+; SSE3-NEXT: bsfq %rax, %rax
+; SSE3-NEXT: cmoveq %rcx, %rax
+; SSE3-NEXT: movd %rax, %xmm0
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv2i64:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd %xmm0, %rax
+; SSSE3-NEXT: bsfq %rax, %rax
+; SSSE3-NEXT: movl $64, %ecx
+; SSSE3-NEXT: cmoveq %rcx, %rax
+; SSSE3-NEXT: movd %rax, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSSE3-NEXT: movd %xmm0, %rax
+; SSSE3-NEXT: bsfq %rax, %rax
+; SSSE3-NEXT: cmoveq %rcx, %rax
+; SSSE3-NEXT: movd %rax, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: pextrq $1, %xmm0, %rax
+; SSE41-NEXT: bsfq %rax, %rax
+; SSE41-NEXT: movl $64, %ecx
+; SSE41-NEXT: cmoveq %rcx, %rax
+; SSE41-NEXT: movd %rax, %xmm1
+; SSE41-NEXT: movd %xmm0, %rax
+; SSE41-NEXT: bsfq %rax, %rax
+; SSE41-NEXT: cmoveq %rcx, %rax
+; SSE41-NEXT: movd %rax, %xmm0
+; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT: retq
+; AVX-LABEL: testv2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: bsfq %rax, %rax
+; AVX-NEXT: movl $64, %ecx
+; AVX-NEXT: cmoveq %rcx, %rax
+; AVX-NEXT: vmovq %rax, %xmm1
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: bsfq %rax, %rax
+; AVX-NEXT: cmoveq %rcx, %rax
+; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
+ %out = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %in, i1 0)
+ ret <2 x i64> %out
+define <2 x i64> @testv2i64u(<2 x i64> %in) {
+; SSE2-LABEL: testv2i64u:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: bsfq %rax, %rax
+; SSE2-NEXT: movd %rax, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: bsfq %rax, %rax
+; SSE2-NEXT: movd %rax, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv2i64u:
+; SSE3: # BB#0:
+; SSE3-NEXT: movd %xmm0, %rax
+; SSE3-NEXT: bsfq %rax, %rax
+; SSE3-NEXT: movd %rax, %xmm1
+; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE3-NEXT: movd %xmm0, %rax
+; SSE3-NEXT: bsfq %rax, %rax
+; SSE3-NEXT: movd %rax, %xmm0
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv2i64u:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd %xmm0, %rax
+; SSSE3-NEXT: bsfq %rax, %rax
+; SSSE3-NEXT: movd %rax, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSSE3-NEXT: movd %xmm0, %rax
+; SSSE3-NEXT: bsfq %rax, %rax
+; SSSE3-NEXT: movd %rax, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv2i64u:
+; SSE41: # BB#0:
+; SSE41-NEXT: pextrq $1, %xmm0, %rax
+; SSE41-NEXT: bsfq %rax, %rax
+; SSE41-NEXT: movd %rax, %xmm1
+; SSE41-NEXT: movd %xmm0, %rax
+; SSE41-NEXT: bsfq %rax, %rax
+; SSE41-NEXT: movd %rax, %xmm0
+; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT: retq
+; AVX-LABEL: testv2i64u:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: bsfq %rax, %rax
+; AVX-NEXT: vmovq %rax, %xmm1
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: bsfq %rax, %rax
+; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
+ %out = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %in, i1 -1)
+ ret <2 x i64> %out
+define <4 x i32> @testv4i32(<4 x i32> %in) {
+; SSE2-LABEL: testv4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: bsfl %eax, %eax
+; SSE2-NEXT: movl $32, %ecx
+; SSE2-NEXT: cmovel %ecx, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; SSE2-NEXT: movd %xmm2, %eax
+; SSE2-NEXT: bsfl %eax, %eax
+; SSE2-NEXT: cmovel %ecx, %eax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: bsfl %eax, %eax
+; SSE2-NEXT: cmovel %ecx, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: bsfl %eax, %eax
+; SSE2-NEXT: cmovel %ecx, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv4i32:
+; SSE3: # BB#0:
+; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; SSE3-NEXT: movd %xmm1, %eax
+; SSE3-NEXT: bsfl %eax, %eax
+; SSE3-NEXT: movl $32, %ecx
+; SSE3-NEXT: cmovel %ecx, %eax
+; SSE3-NEXT: movd %eax, %xmm1
+; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; SSE3-NEXT: movd %xmm2, %eax
+; SSE3-NEXT: bsfl %eax, %eax
+; SSE3-NEXT: cmovel %ecx, %eax
+; SSE3-NEXT: movd %eax, %xmm2
+; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: bsfl %eax, %eax
+; SSE3-NEXT: cmovel %ecx, %eax
+; SSE3-NEXT: movd %eax, %xmm1
+; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: bsfl %eax, %eax
+; SSE3-NEXT: cmovel %ecx, %eax
+; SSE3-NEXT: movd %eax, %xmm0
+; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv4i32:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; SSSE3-NEXT: movd %xmm1, %eax
+; SSSE3-NEXT: bsfl %eax, %eax
+; SSSE3-NEXT: movl $32, %ecx
+; SSSE3-NEXT: cmovel %ecx, %eax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; SSSE3-NEXT: movd %xmm2, %eax
+; SSSE3-NEXT: bsfl %eax, %eax
+; SSSE3-NEXT: cmovel %ecx, %eax
+; SSSE3-NEXT: movd %eax, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT: movd %xmm0, %eax
+; SSSE3-NEXT: bsfl %eax, %eax
+; SSSE3-NEXT: cmovel %ecx, %eax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSSE3-NEXT: movd %xmm0, %eax
+; SSSE3-NEXT: bsfl %eax, %eax
+; SSSE3-NEXT: cmovel %ecx, %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pextrd $1, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: movl $32, %ecx
+; SSE41-NEXT: cmovel %ecx, %eax
+; SSE41-NEXT: movd %xmm0, %edx
+; SSE41-NEXT: bsfl %edx, %edx
+; SSE41-NEXT: cmovel %ecx, %edx
+; SSE41-NEXT: movd %edx, %xmm1
+; SSE41-NEXT: pinsrd $1, %eax, %xmm1
+; SSE41-NEXT: pextrd $2, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: cmovel %ecx, %eax
+; SSE41-NEXT: pinsrd $2, %eax, %xmm1
+; SSE41-NEXT: pextrd $3, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: cmovel %ecx, %eax
+; SSE41-NEXT: pinsrd $3, %eax, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+; AVX-LABEL: testv4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrd $1, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: movl $32, %ecx
+; AVX-NEXT: cmovel %ecx, %eax
+; AVX-NEXT: vmovd %xmm0, %edx
+; AVX-NEXT: bsfl %edx, %edx
+; AVX-NEXT: cmovel %ecx, %edx
+; AVX-NEXT: vmovd %edx, %xmm1
+; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrd $2, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: cmovel %ecx, %eax
+; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrd $3, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: cmovel %ecx, %eax
+; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %out = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %in, i1 0)
+ ret <4 x i32> %out
+define <4 x i32> @testv4i32u(<4 x i32> %in) {
+; SSE2-LABEL: testv4i32u:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: bsfl %eax, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; SSE2-NEXT: movd %xmm2, %eax
+; SSE2-NEXT: bsfl %eax, %eax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: bsfl %eax, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: bsfl %eax, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv4i32u:
+; SSE3: # BB#0:
+; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; SSE3-NEXT: movd %xmm1, %eax
+; SSE3-NEXT: bsfl %eax, %eax
+; SSE3-NEXT: movd %eax, %xmm1
+; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; SSE3-NEXT: movd %xmm2, %eax
+; SSE3-NEXT: bsfl %eax, %eax
+; SSE3-NEXT: movd %eax, %xmm2
+; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: bsfl %eax, %eax
+; SSE3-NEXT: movd %eax, %xmm1
+; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: bsfl %eax, %eax
+; SSE3-NEXT: movd %eax, %xmm0
+; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv4i32u:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; SSSE3-NEXT: movd %xmm1, %eax
+; SSSE3-NEXT: bsfl %eax, %eax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; SSSE3-NEXT: movd %xmm2, %eax
+; SSSE3-NEXT: bsfl %eax, %eax
+; SSSE3-NEXT: movd %eax, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT: movd %xmm0, %eax
+; SSSE3-NEXT: bsfl %eax, %eax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSSE3-NEXT: movd %xmm0, %eax
+; SSSE3-NEXT: bsfl %eax, %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv4i32u:
+; SSE41: # BB#0:
+; SSE41-NEXT: pextrd $1, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: movd %xmm0, %ecx
+; SSE41-NEXT: bsfl %ecx, %ecx
+; SSE41-NEXT: movd %ecx, %xmm1
+; SSE41-NEXT: pinsrd $1, %eax, %xmm1
+; SSE41-NEXT: pextrd $2, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: pinsrd $2, %eax, %xmm1
+; SSE41-NEXT: pextrd $3, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: pinsrd $3, %eax, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+; AVX-LABEL: testv4i32u:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrd $1, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: vmovd %xmm0, %ecx
+; AVX-NEXT: bsfl %ecx, %ecx
+; AVX-NEXT: vmovd %ecx, %xmm1
+; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrd $2, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrd $3, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %out = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %in, i1 -1)
+ ret <4 x i32> %out
+define <8 x i16> @testv8i16(<8 x i16> %in) {
+; SSE2-LABEL: testv8i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: pextrw $7, %xmm0, %eax
+; SSE2-NEXT: bsfw %ax, %cx
+; SSE2-NEXT: movw $16, %ax
+; SSE2-NEXT: cmovew %ax, %cx
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: pextrw $3, %xmm0, %ecx
+; SSE2-NEXT: bsfw %cx, %cx
+; SSE2-NEXT: cmovew %ax, %cx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: pextrw $5, %xmm0, %ecx
+; SSE2-NEXT: bsfw %cx, %cx
+; SSE2-NEXT: cmovew %ax, %cx
+; SSE2-NEXT: movd %ecx, %xmm3
+; SSE2-NEXT: pextrw $1, %xmm0, %ecx
+; SSE2-NEXT: bsfw %cx, %cx
+; SSE2-NEXT: cmovew %ax, %cx
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT: pextrw $6, %xmm0, %ecx
+; SSE2-NEXT: bsfw %cx, %cx
+; SSE2-NEXT: cmovew %ax, %cx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: pextrw $2, %xmm0, %ecx
+; SSE2-NEXT: bsfw %cx, %cx
+; SSE2-NEXT: cmovew %ax, %cx
+; SSE2-NEXT: movd %ecx, %xmm3
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE2-NEXT: pextrw $4, %xmm0, %ecx
+; SSE2-NEXT: bsfw %cx, %cx
+; SSE2-NEXT: cmovew %ax, %cx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: movd %xmm0, %ecx
+; SSE2-NEXT: bsfw %cx, %cx
+; SSE2-NEXT: cmovew %ax, %cx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv8i16:
+; SSE3: # BB#0:
+; SSE3-NEXT: pextrw $7, %xmm0, %eax
+; SSE3-NEXT: bsfw %ax, %cx
+; SSE3-NEXT: movw $16, %ax
+; SSE3-NEXT: cmovew %ax, %cx
+; SSE3-NEXT: movd %ecx, %xmm1
+; SSE3-NEXT: pextrw $3, %xmm0, %ecx
+; SSE3-NEXT: bsfw %cx, %cx
+; SSE3-NEXT: cmovew %ax, %cx
+; SSE3-NEXT: movd %ecx, %xmm2
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE3-NEXT: pextrw $5, %xmm0, %ecx
+; SSE3-NEXT: bsfw %cx, %cx
+; SSE3-NEXT: cmovew %ax, %cx
+; SSE3-NEXT: movd %ecx, %xmm3
+; SSE3-NEXT: pextrw $1, %xmm0, %ecx
+; SSE3-NEXT: bsfw %cx, %cx
+; SSE3-NEXT: cmovew %ax, %cx
+; SSE3-NEXT: movd %ecx, %xmm1
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE3-NEXT: pextrw $6, %xmm0, %ecx
+; SSE3-NEXT: bsfw %cx, %cx
+; SSE3-NEXT: cmovew %ax, %cx
+; SSE3-NEXT: movd %ecx, %xmm2
+; SSE3-NEXT: pextrw $2, %xmm0, %ecx
+; SSE3-NEXT: bsfw %cx, %cx
+; SSE3-NEXT: cmovew %ax, %cx
+; SSE3-NEXT: movd %ecx, %xmm3
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE3-NEXT: pextrw $4, %xmm0, %ecx
+; SSE3-NEXT: bsfw %cx, %cx
+; SSE3-NEXT: cmovew %ax, %cx
+; SSE3-NEXT: movd %ecx, %xmm2
+; SSE3-NEXT: movd %xmm0, %ecx
+; SSE3-NEXT: bsfw %cx, %cx
+; SSE3-NEXT: cmovew %ax, %cx
+; SSE3-NEXT: movd %ecx, %xmm0
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv8i16:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pextrw $7, %xmm0, %eax
+; SSSE3-NEXT: bsfw %ax, %cx
+; SSSE3-NEXT: movw $16, %ax
+; SSSE3-NEXT: cmovew %ax, %cx
+; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: pextrw $3, %xmm0, %ecx
+; SSSE3-NEXT: bsfw %cx, %cx
+; SSSE3-NEXT: cmovew %ax, %cx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT: pextrw $5, %xmm0, %ecx
+; SSSE3-NEXT: bsfw %cx, %cx
+; SSSE3-NEXT: cmovew %ax, %cx
+; SSSE3-NEXT: movd %ecx, %xmm3
+; SSSE3-NEXT: pextrw $1, %xmm0, %ecx
+; SSSE3-NEXT: bsfw %cx, %cx
+; SSSE3-NEXT: cmovew %ax, %cx
+; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT: pextrw $6, %xmm0, %ecx
+; SSSE3-NEXT: bsfw %cx, %cx
+; SSSE3-NEXT: cmovew %ax, %cx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: pextrw $2, %xmm0, %ecx
+; SSSE3-NEXT: bsfw %cx, %cx
+; SSSE3-NEXT: cmovew %ax, %cx
+; SSSE3-NEXT: movd %ecx, %xmm3
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSSE3-NEXT: pextrw $4, %xmm0, %ecx
+; SSSE3-NEXT: bsfw %cx, %cx
+; SSSE3-NEXT: cmovew %ax, %cx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: movd %xmm0, %ecx
+; SSSE3-NEXT: bsfw %cx, %cx
+; SSSE3-NEXT: cmovew %ax, %cx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv8i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: pextrw $1, %xmm0, %eax
+; SSE41-NEXT: bsfw %ax, %cx
+; SSE41-NEXT: movw $16, %ax
+; SSE41-NEXT: cmovew %ax, %cx
+; SSE41-NEXT: movd %xmm0, %edx
+; SSE41-NEXT: bsfw %dx, %dx
+; SSE41-NEXT: cmovew %ax, %dx
+; SSE41-NEXT: movd %edx, %xmm1
+; SSE41-NEXT: pinsrw $1, %ecx, %xmm1
+; SSE41-NEXT: pextrw $2, %xmm0, %ecx
+; SSE41-NEXT: bsfw %cx, %cx
+; SSE41-NEXT: cmovew %ax, %cx
+; SSE41-NEXT: pinsrw $2, %ecx, %xmm1
+; SSE41-NEXT: pextrw $3, %xmm0, %ecx
+; SSE41-NEXT: bsfw %cx, %cx
+; SSE41-NEXT: cmovew %ax, %cx
+; SSE41-NEXT: pinsrw $3, %ecx, %xmm1
+; SSE41-NEXT: pextrw $4, %xmm0, %ecx
+; SSE41-NEXT: bsfw %cx, %cx
+; SSE41-NEXT: cmovew %ax, %cx
+; SSE41-NEXT: pinsrw $4, %ecx, %xmm1
+; SSE41-NEXT: pextrw $5, %xmm0, %ecx
+; SSE41-NEXT: bsfw %cx, %cx
+; SSE41-NEXT: cmovew %ax, %cx
+; SSE41-NEXT: pinsrw $5, %ecx, %xmm1
+; SSE41-NEXT: pextrw $6, %xmm0, %ecx
+; SSE41-NEXT: bsfw %cx, %cx
+; SSE41-NEXT: cmovew %ax, %cx
+; SSE41-NEXT: pinsrw $6, %ecx, %xmm1
+; SSE41-NEXT: pextrw $7, %xmm0, %ecx
+; SSE41-NEXT: bsfw %cx, %cx
+; SSE41-NEXT: cmovew %ax, %cx
+; SSE41-NEXT: pinsrw $7, %ecx, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+; AVX-LABEL: testv8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrw $1, %xmm0, %eax
+; AVX-NEXT: bsfw %ax, %cx
+; AVX-NEXT: movw $16, %ax
+; AVX-NEXT: cmovew %ax, %cx
+; AVX-NEXT: vmovd %xmm0, %edx
+; AVX-NEXT: bsfw %dx, %dx
+; AVX-NEXT: cmovew %ax, %dx
+; AVX-NEXT: vmovd %edx, %xmm1
+; AVX-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $2, %xmm0, %ecx
+; AVX-NEXT: bsfw %cx, %cx
+; AVX-NEXT: cmovew %ax, %cx
+; AVX-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $3, %xmm0, %ecx
+; AVX-NEXT: bsfw %cx, %cx
+; AVX-NEXT: cmovew %ax, %cx
+; AVX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $4, %xmm0, %ecx
+; AVX-NEXT: bsfw %cx, %cx
+; AVX-NEXT: cmovew %ax, %cx
+; AVX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $5, %xmm0, %ecx
+; AVX-NEXT: bsfw %cx, %cx
+; AVX-NEXT: cmovew %ax, %cx
+; AVX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $6, %xmm0, %ecx
+; AVX-NEXT: bsfw %cx, %cx
+; AVX-NEXT: cmovew %ax, %cx
+; AVX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $7, %xmm0, %ecx
+; AVX-NEXT: bsfw %cx, %cx
+; AVX-NEXT: cmovew %ax, %cx
+; AVX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %out = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %in, i1 0)
+ ret <8 x i16> %out
+define <8 x i16> @testv8i16u(<8 x i16> %in) {
+; SSE2-LABEL: testv8i16u:
+; SSE2: # BB#0:
+; SSE2-NEXT: pextrw $7, %xmm0, %eax
+; SSE2-NEXT: bsfw %ax, %ax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: pextrw $3, %xmm0, %eax
+; SSE2-NEXT: bsfw %ax, %ax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: pextrw $5, %xmm0, %eax
+; SSE2-NEXT: bsfw %ax, %ax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: pextrw $1, %xmm0, %eax
+; SSE2-NEXT: bsfw %ax, %ax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE2-NEXT: pextrw $6, %xmm0, %eax
+; SSE2-NEXT: bsfw %ax, %ax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: pextrw $2, %xmm0, %eax
+; SSE2-NEXT: bsfw %ax, %ax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: pextrw $4, %xmm0, %eax
+; SSE2-NEXT: bsfw %ax, %ax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: bsfw %ax, %ax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv8i16u:
+; SSE3: # BB#0:
+; SSE3-NEXT: pextrw $7, %xmm0, %eax
+; SSE3-NEXT: bsfw %ax, %ax
+; SSE3-NEXT: movd %eax, %xmm1
+; SSE3-NEXT: pextrw $3, %xmm0, %eax
+; SSE3-NEXT: bsfw %ax, %ax
+; SSE3-NEXT: movd %eax, %xmm2
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE3-NEXT: pextrw $5, %xmm0, %eax
+; SSE3-NEXT: bsfw %ax, %ax
+; SSE3-NEXT: movd %eax, %xmm1
+; SSE3-NEXT: pextrw $1, %xmm0, %eax
+; SSE3-NEXT: bsfw %ax, %ax
+; SSE3-NEXT: movd %eax, %xmm3
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE3-NEXT: pextrw $6, %xmm0, %eax
+; SSE3-NEXT: bsfw %ax, %ax
+; SSE3-NEXT: movd %eax, %xmm1
+; SSE3-NEXT: pextrw $2, %xmm0, %eax
+; SSE3-NEXT: bsfw %ax, %ax
+; SSE3-NEXT: movd %eax, %xmm2
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE3-NEXT: pextrw $4, %xmm0, %eax
+; SSE3-NEXT: bsfw %ax, %ax
+; SSE3-NEXT: movd %eax, %xmm1
+; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: bsfw %ax, %ax
+; SSE3-NEXT: movd %eax, %xmm0
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv8i16u:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pextrw $7, %xmm0, %eax
+; SSSE3-NEXT: bsfw %ax, %ax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: pextrw $3, %xmm0, %eax
+; SSSE3-NEXT: bsfw %ax, %ax
+; SSSE3-NEXT: movd %eax, %xmm2
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT: pextrw $5, %xmm0, %eax
+; SSSE3-NEXT: bsfw %ax, %ax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: pextrw $1, %xmm0, %eax
+; SSSE3-NEXT: bsfw %ax, %ax
+; SSSE3-NEXT: movd %eax, %xmm3
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSSE3-NEXT: pextrw $6, %xmm0, %eax
+; SSSE3-NEXT: bsfw %ax, %ax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: pextrw $2, %xmm0, %eax
+; SSSE3-NEXT: bsfw %ax, %ax
+; SSSE3-NEXT: movd %eax, %xmm2
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT: pextrw $4, %xmm0, %eax
+; SSSE3-NEXT: bsfw %ax, %ax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: movd %xmm0, %eax
+; SSSE3-NEXT: bsfw %ax, %ax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv8i16u:
+; SSE41: # BB#0:
+; SSE41-NEXT: pextrw $1, %xmm0, %eax
+; SSE41-NEXT: bsfw %ax, %ax
+; SSE41-NEXT: movd %xmm0, %ecx
+; SSE41-NEXT: bsfw %cx, %cx
+; SSE41-NEXT: movd %ecx, %xmm1
+; SSE41-NEXT: pinsrw $1, %eax, %xmm1
+; SSE41-NEXT: pextrw $2, %xmm0, %eax
+; SSE41-NEXT: bsfw %ax, %ax
+; SSE41-NEXT: pinsrw $2, %eax, %xmm1
+; SSE41-NEXT: pextrw $3, %xmm0, %eax
+; SSE41-NEXT: bsfw %ax, %ax
+; SSE41-NEXT: pinsrw $3, %eax, %xmm1
+; SSE41-NEXT: pextrw $4, %xmm0, %eax
+; SSE41-NEXT: bsfw %ax, %ax
+; SSE41-NEXT: pinsrw $4, %eax, %xmm1
+; SSE41-NEXT: pextrw $5, %xmm0, %eax
+; SSE41-NEXT: bsfw %ax, %ax
+; SSE41-NEXT: pinsrw $5, %eax, %xmm1
+; SSE41-NEXT: pextrw $6, %xmm0, %eax
+; SSE41-NEXT: bsfw %ax, %ax
+; SSE41-NEXT: pinsrw $6, %eax, %xmm1
+; SSE41-NEXT: pextrw $7, %xmm0, %eax
+; SSE41-NEXT: bsfw %ax, %ax
+; SSE41-NEXT: pinsrw $7, %eax, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+; AVX-LABEL: testv8i16u:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrw $1, %xmm0, %eax
+; AVX-NEXT: bsfw %ax, %ax
+; AVX-NEXT: vmovd %xmm0, %ecx
+; AVX-NEXT: bsfw %cx, %cx
+; AVX-NEXT: vmovd %ecx, %xmm1
+; AVX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $2, %xmm0, %eax
+; AVX-NEXT: bsfw %ax, %ax
+; AVX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $3, %xmm0, %eax
+; AVX-NEXT: bsfw %ax, %ax
+; AVX-NEXT: vpinsrw $3, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $4, %xmm0, %eax
+; AVX-NEXT: bsfw %ax, %ax
+; AVX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $5, %xmm0, %eax
+; AVX-NEXT: bsfw %ax, %ax
+; AVX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $6, %xmm0, %eax
+; AVX-NEXT: bsfw %ax, %ax
+; AVX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrw $7, %xmm0, %eax
+; AVX-NEXT: bsfw %ax, %ax
+; AVX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %out = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %in, i1 -1)
+ ret <8 x i16> %out
+define <16 x i8> @testv16i8(<16 x i8> %in) {
+; SSE2-LABEL: testv16i8:
+; SSE2: # BB#0:
+; SSE2: pushq %rbp
+; SSE2: pushq %r14
+; SSE2: pushq %rbx
+; SSE2: movaps %xmm0, -16(%rsp)
+; SSE2-NEXT: movzbl -1(%rsp), %eax
+; SSE2-NEXT: bsfl %eax, %edx
+; SSE2-NEXT: movl $32, %eax
+; SSE2-NEXT: cmovel %eax, %edx
+; SSE2-NEXT: cmpl $32, %edx
+; SSE2-NEXT: movl $8, %ecx
+; SSE2-NEXT: cmovel %ecx, %edx
+; SSE2-NEXT: movd %edx, %xmm0
+; SSE2-NEXT: movzbl -2(%rsp), %r14d
+; SSE2-NEXT: movzbl -3(%rsp), %ebx
+; SSE2-NEXT: movzbl -4(%rsp), %r9d
+; SSE2-NEXT: movzbl -5(%rsp), %edi
+; SSE2-NEXT: movzbl -6(%rsp), %r11d
+; SSE2-NEXT: movzbl -7(%rsp), %edx
+; SSE2-NEXT: movzbl -8(%rsp), %r8d
+; SSE2-NEXT: movzbl -9(%rsp), %esi
+; SSE2-NEXT: bsfl %esi, %esi
+; SSE2-NEXT: cmovel %eax, %esi
+; SSE2-NEXT: cmpl $32, %esi
+; SSE2-NEXT: cmovel %ecx, %esi
+; SSE2-NEXT: movd %esi, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: bsfl %edi, %esi
+; SSE2-NEXT: cmovel %eax, %esi
+; SSE2-NEXT: cmpl $32, %esi
+; SSE2-NEXT: cmovel %ecx, %esi
+; SSE2-NEXT: movd %esi, %xmm2
+; SSE2-NEXT: movzbl -10(%rsp), %edi
+; SSE2-NEXT: movzbl -11(%rsp), %esi
+; SSE2-NEXT: movzbl -12(%rsp), %r10d
+; SSE2-NEXT: movzbl -13(%rsp), %ebp
+; SSE2-NEXT: bsfl %ebp, %ebp
+; SSE2-NEXT: cmovel %eax, %ebp
+; SSE2-NEXT: cmpl $32, %ebp
+; SSE2-NEXT: cmovel %ecx, %ebp
+; SSE2-NEXT: movd %ebp, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: bsfl %ebx, %ebx
+; SSE2-NEXT: cmovel %eax, %ebx
+; SSE2-NEXT: cmpl $32, %ebx
+; SSE2-NEXT: cmovel %ecx, %ebx
+; SSE2-NEXT: movd %ebx, %xmm1
+; SSE2-NEXT: bsfl %esi, %esi
+; SSE2-NEXT: cmovel %eax, %esi
+; SSE2-NEXT: cmpl $32, %esi
+; SSE2-NEXT: cmovel %ecx, %esi
+; SSE2-NEXT: movd %esi, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT: bsfl %edx, %edx
+; SSE2-NEXT: cmovel %eax, %edx
+; SSE2-NEXT: cmpl $32, %edx
+; SSE2-NEXT: cmovel %ecx, %edx
+; SSE2-NEXT: movd %edx, %xmm3
+; SSE2-NEXT: movzbl -14(%rsp), %edx
+; SSE2-NEXT: movzbl -15(%rsp), %esi
+; SSE2-NEXT: bsfl %esi, %esi
+; SSE2-NEXT: cmovel %eax, %esi
+; SSE2-NEXT: cmpl $32, %esi
+; SSE2-NEXT: cmovel %ecx, %esi
+; SSE2-NEXT: movd %esi, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: bsfl %r14d, %esi
+; SSE2-NEXT: cmovel %eax, %esi
+; SSE2-NEXT: cmpl $32, %esi
+; SSE2-NEXT: cmovel %ecx, %esi
+; SSE2-NEXT: movd %esi, %xmm0
+; SSE2-NEXT: bsfl %edi, %esi
+; SSE2-NEXT: cmovel %eax, %esi
+; SSE2-NEXT: cmpl $32, %esi
+; SSE2-NEXT: cmovel %ecx, %esi
+; SSE2-NEXT: movd %esi, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE2-NEXT: bsfl %r11d, %esi
+; SSE2-NEXT: cmovel %eax, %esi
+; SSE2-NEXT: cmpl $32, %esi
+; SSE2-NEXT: cmovel %ecx, %esi
+; SSE2-NEXT: movd %esi, %xmm0
+; SSE2-NEXT: bsfl %edx, %edx
+; SSE2-NEXT: cmovel %eax, %edx
+; SSE2-NEXT: cmpl $32, %edx
+; SSE2-NEXT: cmovel %ecx, %edx
+; SSE2-NEXT: movd %edx, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; SSE2-NEXT: bsfl %r9d, %edx
+; SSE2-NEXT: cmovel %eax, %edx
+; SSE2-NEXT: cmpl $32, %edx
+; SSE2-NEXT: cmovel %ecx, %edx
+; SSE2-NEXT: movd %edx, %xmm0
+; SSE2-NEXT: bsfl %r10d, %edx
+; SSE2-NEXT: cmovel %eax, %edx
+; SSE2-NEXT: cmpl $32, %edx
+; SSE2-NEXT: cmovel %ecx, %edx
+; SSE2-NEXT: movd %edx, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE2-NEXT: bsfl %r8d, %edx
+; SSE2-NEXT: cmovel %eax, %edx
+; SSE2-NEXT: cmpl $32, %edx
+; SSE2-NEXT: cmovel %ecx, %edx
+; SSE2-NEXT: movd %edx, %xmm4
+; SSE2-NEXT: movzbl -16(%rsp), %edx
+; SSE2-NEXT: bsfl %edx, %edx
+; SSE2-NEXT: cmovel %eax, %edx
+; SSE2-NEXT: cmpl $32, %edx
+; SSE2-NEXT: cmovel %ecx, %edx
+; SSE2-NEXT: movd %edx, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: popq %rbx
+; SSE2-NEXT: popq %r14
+; SSE2-NEXT: popq %rbp
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv16i8:
+; SSE3: # BB#0:
+; SSE3: pushq %rbp
+; SSE3: pushq %r14
+; SSE3: pushq %rbx
+; SSE3: movaps %xmm0, -16(%rsp)
+; SSE3-NEXT: movzbl -1(%rsp), %eax
+; SSE3-NEXT: bsfl %eax, %edx
+; SSE3-NEXT: movl $32, %eax
+; SSE3-NEXT: cmovel %eax, %edx
+; SSE3-NEXT: cmpl $32, %edx
+; SSE3-NEXT: movl $8, %ecx
+; SSE3-NEXT: cmovel %ecx, %edx
+; SSE3-NEXT: movd %edx, %xmm0
+; SSE3-NEXT: movzbl -2(%rsp), %r14d
+; SSE3-NEXT: movzbl -3(%rsp), %ebx
+; SSE3-NEXT: movzbl -4(%rsp), %r9d
+; SSE3-NEXT: movzbl -5(%rsp), %edi
+; SSE3-NEXT: movzbl -6(%rsp), %r11d
+; SSE3-NEXT: movzbl -7(%rsp), %edx
+; SSE3-NEXT: movzbl -8(%rsp), %r8d
+; SSE3-NEXT: movzbl -9(%rsp), %esi
+; SSE3-NEXT: bsfl %esi, %esi
+; SSE3-NEXT: cmovel %eax, %esi
+; SSE3-NEXT: cmpl $32, %esi
+; SSE3-NEXT: cmovel %ecx, %esi
+; SSE3-NEXT: movd %esi, %xmm1
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE3-NEXT: bsfl %edi, %esi
+; SSE3-NEXT: cmovel %eax, %esi
+; SSE3-NEXT: cmpl $32, %esi
+; SSE3-NEXT: cmovel %ecx, %esi
+; SSE3-NEXT: movd %esi, %xmm2
+; SSE3-NEXT: movzbl -10(%rsp), %edi
+; SSE3-NEXT: movzbl -11(%rsp), %esi
+; SSE3-NEXT: movzbl -12(%rsp), %r10d
+; SSE3-NEXT: movzbl -13(%rsp), %ebp
+; SSE3-NEXT: bsfl %ebp, %ebp
+; SSE3-NEXT: cmovel %eax, %ebp
+; SSE3-NEXT: cmpl $32, %ebp
+; SSE3-NEXT: cmovel %ecx, %ebp
+; SSE3-NEXT: movd %ebp, %xmm0
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE3-NEXT: bsfl %ebx, %ebx
+; SSE3-NEXT: cmovel %eax, %ebx
+; SSE3-NEXT: cmpl $32, %ebx
+; SSE3-NEXT: cmovel %ecx, %ebx
+; SSE3-NEXT: movd %ebx, %xmm1
+; SSE3-NEXT: bsfl %esi, %esi
+; SSE3-NEXT: cmovel %eax, %esi
+; SSE3-NEXT: cmpl $32, %esi
+; SSE3-NEXT: cmovel %ecx, %esi
+; SSE3-NEXT: movd %esi, %xmm2
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE3-NEXT: bsfl %edx, %edx
+; SSE3-NEXT: cmovel %eax, %edx
+; SSE3-NEXT: cmpl $32, %edx
+; SSE3-NEXT: cmovel %ecx, %edx
+; SSE3-NEXT: movd %edx, %xmm3
+; SSE3-NEXT: movzbl -14(%rsp), %edx
+; SSE3-NEXT: movzbl -15(%rsp), %esi
+; SSE3-NEXT: bsfl %esi, %esi
+; SSE3-NEXT: cmovel %eax, %esi
+; SSE3-NEXT: cmpl $32, %esi
+; SSE3-NEXT: cmovel %ecx, %esi
+; SSE3-NEXT: movd %esi, %xmm1
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE3-NEXT: bsfl %r14d, %esi
+; SSE3-NEXT: cmovel %eax, %esi
+; SSE3-NEXT: cmpl $32, %esi
+; SSE3-NEXT: cmovel %ecx, %esi
+; SSE3-NEXT: movd %esi, %xmm0
+; SSE3-NEXT: bsfl %edi, %esi
+; SSE3-NEXT: cmovel %eax, %esi
+; SSE3-NEXT: cmpl $32, %esi
+; SSE3-NEXT: cmovel %ecx, %esi
+; SSE3-NEXT: movd %esi, %xmm3
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE3-NEXT: bsfl %r11d, %esi
+; SSE3-NEXT: cmovel %eax, %esi
+; SSE3-NEXT: cmpl $32, %esi
+; SSE3-NEXT: cmovel %ecx, %esi
+; SSE3-NEXT: movd %esi, %xmm0
+; SSE3-NEXT: bsfl %edx, %edx
+; SSE3-NEXT: cmovel %eax, %edx
+; SSE3-NEXT: cmpl $32, %edx
+; SSE3-NEXT: cmovel %ecx, %edx
+; SSE3-NEXT: movd %edx, %xmm2
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; SSE3-NEXT: bsfl %r9d, %edx
+; SSE3-NEXT: cmovel %eax, %edx
+; SSE3-NEXT: cmpl $32, %edx
+; SSE3-NEXT: cmovel %ecx, %edx
+; SSE3-NEXT: movd %edx, %xmm0
+; SSE3-NEXT: bsfl %r10d, %edx
+; SSE3-NEXT: cmovel %eax, %edx
+; SSE3-NEXT: cmpl $32, %edx
+; SSE3-NEXT: cmovel %ecx, %edx
+; SSE3-NEXT: movd %edx, %xmm3
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE3-NEXT: bsfl %r8d, %edx
+; SSE3-NEXT: cmovel %eax, %edx
+; SSE3-NEXT: cmpl $32, %edx
+; SSE3-NEXT: cmovel %ecx, %edx
+; SSE3-NEXT: movd %edx, %xmm4
+; SSE3-NEXT: movzbl -16(%rsp), %edx
+; SSE3-NEXT: bsfl %edx, %edx
+; SSE3-NEXT: cmovel %eax, %edx
+; SSE3-NEXT: cmpl $32, %edx
+; SSE3-NEXT: cmovel %ecx, %edx
+; SSE3-NEXT: movd %edx, %xmm0
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE3-NEXT: popq %rbx
+; SSE3-NEXT: popq %r14
+; SSE3-NEXT: popq %rbp
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv16i8:
+; SSSE3: # BB#0:
+; SSSE3: pushq %rbp
+; SSSE3: pushq %r14
+; SSSE3: pushq %rbx
+; SSSE3: movaps %xmm0, -16(%rsp)
+; SSSE3-NEXT: movzbl -1(%rsp), %eax
+; SSSE3-NEXT: bsfl %eax, %edx
+; SSSE3-NEXT: movl $32, %eax
+; SSSE3-NEXT: cmovel %eax, %edx
+; SSSE3-NEXT: cmpl $32, %edx
+; SSSE3-NEXT: movl $8, %ecx
+; SSSE3-NEXT: cmovel %ecx, %edx
+; SSSE3-NEXT: movd %edx, %xmm0
+; SSSE3-NEXT: movzbl -2(%rsp), %r14d
+; SSSE3-NEXT: movzbl -3(%rsp), %ebx
+; SSSE3-NEXT: movzbl -4(%rsp), %r9d
+; SSSE3-NEXT: movzbl -5(%rsp), %edi
+; SSSE3-NEXT: movzbl -6(%rsp), %r11d
+; SSSE3-NEXT: movzbl -7(%rsp), %edx
+; SSSE3-NEXT: movzbl -8(%rsp), %r8d
+; SSSE3-NEXT: movzbl -9(%rsp), %esi
+; SSSE3-NEXT: bsfl %esi, %esi
+; SSSE3-NEXT: cmovel %eax, %esi
+; SSSE3-NEXT: cmpl $32, %esi
+; SSSE3-NEXT: cmovel %ecx, %esi
+; SSSE3-NEXT: movd %esi, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: bsfl %edi, %esi
+; SSSE3-NEXT: cmovel %eax, %esi
+; SSSE3-NEXT: cmpl $32, %esi
+; SSSE3-NEXT: cmovel %ecx, %esi
+; SSSE3-NEXT: movd %esi, %xmm2
+; SSSE3-NEXT: movzbl -10(%rsp), %edi
+; SSSE3-NEXT: movzbl -11(%rsp), %esi
+; SSSE3-NEXT: movzbl -12(%rsp), %r10d
+; SSSE3-NEXT: movzbl -13(%rsp), %ebp
+; SSSE3-NEXT: bsfl %ebp, %ebp
+; SSSE3-NEXT: cmovel %eax, %ebp
+; SSSE3-NEXT: cmpl $32, %ebp
+; SSSE3-NEXT: cmovel %ecx, %ebp
+; SSSE3-NEXT: movd %ebp, %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT: bsfl %ebx, %ebx
+; SSSE3-NEXT: cmovel %eax, %ebx
+; SSSE3-NEXT: cmpl $32, %ebx
+; SSSE3-NEXT: cmovel %ecx, %ebx
+; SSSE3-NEXT: movd %ebx, %xmm1
+; SSSE3-NEXT: bsfl %esi, %esi
+; SSSE3-NEXT: cmovel %eax, %esi
+; SSSE3-NEXT: cmpl $32, %esi
+; SSSE3-NEXT: cmovel %ecx, %esi
+; SSSE3-NEXT: movd %esi, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSSE3-NEXT: bsfl %edx, %edx
+; SSSE3-NEXT: cmovel %eax, %edx
+; SSSE3-NEXT: cmpl $32, %edx
+; SSSE3-NEXT: cmovel %ecx, %edx
+; SSSE3-NEXT: movd %edx, %xmm3
+; SSSE3-NEXT: movzbl -14(%rsp), %edx
+; SSSE3-NEXT: movzbl -15(%rsp), %esi
+; SSSE3-NEXT: bsfl %esi, %esi
+; SSSE3-NEXT: cmovel %eax, %esi
+; SSSE3-NEXT: cmpl $32, %esi
+; SSSE3-NEXT: cmovel %ecx, %esi
+; SSSE3-NEXT: movd %esi, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: bsfl %r14d, %esi
+; SSSE3-NEXT: cmovel %eax, %esi
+; SSSE3-NEXT: cmpl $32, %esi
+; SSSE3-NEXT: cmovel %ecx, %esi
+; SSSE3-NEXT: movd %esi, %xmm0
+; SSSE3-NEXT: bsfl %edi, %esi
+; SSSE3-NEXT: cmovel %eax, %esi
+; SSSE3-NEXT: cmpl $32, %esi
+; SSSE3-NEXT: cmovel %ecx, %esi
+; SSSE3-NEXT: movd %esi, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSSE3-NEXT: bsfl %r11d, %esi
+; SSSE3-NEXT: cmovel %eax, %esi
+; SSSE3-NEXT: cmpl $32, %esi
+; SSSE3-NEXT: cmovel %ecx, %esi
+; SSSE3-NEXT: movd %esi, %xmm0
+; SSSE3-NEXT: bsfl %edx, %edx
+; SSSE3-NEXT: cmovel %eax, %edx
+; SSSE3-NEXT: cmpl $32, %edx
+; SSSE3-NEXT: cmovel %ecx, %edx
+; SSSE3-NEXT: movd %edx, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; SSSE3-NEXT: bsfl %r9d, %edx
+; SSSE3-NEXT: cmovel %eax, %edx
+; SSSE3-NEXT: cmpl $32, %edx
+; SSSE3-NEXT: cmovel %ecx, %edx
+; SSSE3-NEXT: movd %edx, %xmm0
+; SSSE3-NEXT: bsfl %r10d, %edx
+; SSSE3-NEXT: cmovel %eax, %edx
+; SSSE3-NEXT: cmpl $32, %edx
+; SSSE3-NEXT: cmovel %ecx, %edx
+; SSSE3-NEXT: movd %edx, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSSE3-NEXT: bsfl %r8d, %edx
+; SSSE3-NEXT: cmovel %eax, %edx
+; SSSE3-NEXT: cmpl $32, %edx
+; SSSE3-NEXT: cmovel %ecx, %edx
+; SSSE3-NEXT: movd %edx, %xmm4
+; SSSE3-NEXT: movzbl -16(%rsp), %edx
+; SSSE3-NEXT: bsfl %edx, %edx
+; SSSE3-NEXT: cmovel %eax, %edx
+; SSSE3-NEXT: cmpl $32, %edx
+; SSSE3-NEXT: cmovel %ecx, %edx
+; SSSE3-NEXT: movd %edx, %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT: popq %rbx
+; SSSE3-NEXT: popq %r14
+; SSSE3-NEXT: popq %rbp
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv16i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: pextrb $1, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %edx
+; SSE41-NEXT: movl $32, %eax
+; SSE41-NEXT: cmovel %eax, %edx
+; SSE41-NEXT: cmpl $32, %edx
+; SSE41-NEXT: movl $8, %ecx
+; SSE41-NEXT: cmovel %ecx, %edx
+; SSE41-NEXT: pextrb $0, %xmm0, %esi
+; SSE41-NEXT: bsfl %esi, %esi
+; SSE41-NEXT: cmovel %eax, %esi
+; SSE41-NEXT: cmpl $32, %esi
+; SSE41-NEXT: cmovel %ecx, %esi
+; SSE41-NEXT: movd %esi, %xmm1
+; SSE41-NEXT: pinsrb $1, %edx, %xmm1
+; SSE41-NEXT: pextrb $2, %xmm0, %edx
+; SSE41-NEXT: bsfl %edx, %edx
+; SSE41-NEXT: cmovel %eax, %edx
+; SSE41-NEXT: cmpl $32, %edx
+; SSE41-NEXT: cmovel %ecx, %edx
+; SSE41-NEXT: pinsrb $2, %edx, %xmm1
+; SSE41-NEXT: pextrb $3, %xmm0, %edx
+; SSE41-NEXT: bsfl %edx, %edx
+; SSE41-NEXT: cmovel %eax, %edx
+; SSE41-NEXT: cmpl $32, %edx
+; SSE41-NEXT: cmovel %ecx, %edx
+; SSE41-NEXT: pinsrb $3, %edx, %xmm1
+; SSE41-NEXT: pextrb $4, %xmm0, %edx
+; SSE41-NEXT: bsfl %edx, %edx
+; SSE41-NEXT: cmovel %eax, %edx
+; SSE41-NEXT: cmpl $32, %edx
+; SSE41-NEXT: cmovel %ecx, %edx
+; SSE41-NEXT: pinsrb $4, %edx, %xmm1
+; SSE41-NEXT: pextrb $5, %xmm0, %edx
+; SSE41-NEXT: bsfl %edx, %edx
+; SSE41-NEXT: cmovel %eax, %edx
+; SSE41-NEXT: cmpl $32, %edx
+; SSE41-NEXT: cmovel %ecx, %edx
+; SSE41-NEXT: pinsrb $5, %edx, %xmm1
+; SSE41-NEXT: pextrb $6, %xmm0, %edx
+; SSE41-NEXT: bsfl %edx, %edx
+; SSE41-NEXT: cmovel %eax, %edx
+; SSE41-NEXT: cmpl $32, %edx
+; SSE41-NEXT: cmovel %ecx, %edx
+; SSE41-NEXT: pinsrb $6, %edx, %xmm1
+; SSE41-NEXT: pextrb $7, %xmm0, %edx
+; SSE41-NEXT: bsfl %edx, %edx
+; SSE41-NEXT: cmovel %eax, %edx
+; SSE41-NEXT: cmpl $32, %edx
+; SSE41-NEXT: cmovel %ecx, %edx
+; SSE41-NEXT: pinsrb $7, %edx, %xmm1
+; SSE41-NEXT: pextrb $8, %xmm0, %edx
+; SSE41-NEXT: bsfl %edx, %edx
+; SSE41-NEXT: cmovel %eax, %edx
+; SSE41-NEXT: cmpl $32, %edx
+; SSE41-NEXT: cmovel %ecx, %edx
+; SSE41-NEXT: pinsrb $8, %edx, %xmm1
+; SSE41-NEXT: pextrb $9, %xmm0, %edx
+; SSE41-NEXT: bsfl %edx, %edx
+; SSE41-NEXT: cmovel %eax, %edx
+; SSE41-NEXT: cmpl $32, %edx
+; SSE41-NEXT: cmovel %ecx, %edx
+; SSE41-NEXT: pinsrb $9, %edx, %xmm1
+; SSE41-NEXT: pextrb $10, %xmm0, %edx
+; SSE41-NEXT: bsfl %edx, %edx
+; SSE41-NEXT: cmovel %eax, %edx
+; SSE41-NEXT: cmpl $32, %edx
+; SSE41-NEXT: cmovel %ecx, %edx
+; SSE41-NEXT: pinsrb $10, %edx, %xmm1
+; SSE41-NEXT: pextrb $11, %xmm0, %edx
+; SSE41-NEXT: bsfl %edx, %edx
+; SSE41-NEXT: cmovel %eax, %edx
+; SSE41-NEXT: cmpl $32, %edx
+; SSE41-NEXT: cmovel %ecx, %edx
+; SSE41-NEXT: pinsrb $11, %edx, %xmm1
+; SSE41-NEXT: pextrb $12, %xmm0, %edx
+; SSE41-NEXT: bsfl %edx, %edx
+; SSE41-NEXT: cmovel %eax, %edx
+; SSE41-NEXT: cmpl $32, %edx
+; SSE41-NEXT: cmovel %ecx, %edx
+; SSE41-NEXT: pinsrb $12, %edx, %xmm1
+; SSE41-NEXT: pextrb $13, %xmm0, %edx
+; SSE41-NEXT: bsfl %edx, %edx
+; SSE41-NEXT: cmovel %eax, %edx
+; SSE41-NEXT: cmpl $32, %edx
+; SSE41-NEXT: cmovel %ecx, %edx
+; SSE41-NEXT: pinsrb $13, %edx, %xmm1
+; SSE41-NEXT: pextrb $14, %xmm0, %edx
+; SSE41-NEXT: bsfl %edx, %edx
+; SSE41-NEXT: cmovel %eax, %edx
+; SSE41-NEXT: cmpl $32, %edx
+; SSE41-NEXT: cmovel %ecx, %edx
+; SSE41-NEXT: pinsrb $14, %edx, %xmm1
+; SSE41-NEXT: pextrb $15, %xmm0, %edx
+; SSE41-NEXT: bsfl %edx, %edx
+; SSE41-NEXT: cmovel %eax, %edx
+; SSE41-NEXT: cmpl $32, %edx
+; SSE41-NEXT: cmovel %ecx, %edx
+; SSE41-NEXT: pinsrb $15, %edx, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+; AVX-LABEL: testv16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrb $1, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %edx
+; AVX-NEXT: movl $32, %eax
+; AVX-NEXT: cmovel %eax, %edx
+; AVX-NEXT: cmpl $32, %edx
+; AVX-NEXT: movl $8, %ecx
+; AVX-NEXT: cmovel %ecx, %edx
+; AVX-NEXT: vpextrb $0, %xmm0, %esi
+; AVX-NEXT: bsfl %esi, %esi
+; AVX-NEXT: cmovel %eax, %esi
+; AVX-NEXT: cmpl $32, %esi
+; AVX-NEXT: cmovel %ecx, %esi
+; AVX-NEXT: vmovd %esi, %xmm1
+; AVX-NEXT: vpinsrb $1, %edx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $2, %xmm0, %edx
+; AVX-NEXT: bsfl %edx, %edx
+; AVX-NEXT: cmovel %eax, %edx
+; AVX-NEXT: cmpl $32, %edx
+; AVX-NEXT: cmovel %ecx, %edx
+; AVX-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $3, %xmm0, %edx
+; AVX-NEXT: bsfl %edx, %edx
+; AVX-NEXT: cmovel %eax, %edx
+; AVX-NEXT: cmpl $32, %edx
+; AVX-NEXT: cmovel %ecx, %edx
+; AVX-NEXT: vpinsrb $3, %edx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $4, %xmm0, %edx
+; AVX-NEXT: bsfl %edx, %edx
+; AVX-NEXT: cmovel %eax, %edx
+; AVX-NEXT: cmpl $32, %edx
+; AVX-NEXT: cmovel %ecx, %edx
+; AVX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $5, %xmm0, %edx
+; AVX-NEXT: bsfl %edx, %edx
+; AVX-NEXT: cmovel %eax, %edx
+; AVX-NEXT: cmpl $32, %edx
+; AVX-NEXT: cmovel %ecx, %edx
+; AVX-NEXT: vpinsrb $5, %edx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $6, %xmm0, %edx
+; AVX-NEXT: bsfl %edx, %edx
+; AVX-NEXT: cmovel %eax, %edx
+; AVX-NEXT: cmpl $32, %edx
+; AVX-NEXT: cmovel %ecx, %edx
+; AVX-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $7, %xmm0, %edx
+; AVX-NEXT: bsfl %edx, %edx
+; AVX-NEXT: cmovel %eax, %edx
+; AVX-NEXT: cmpl $32, %edx
+; AVX-NEXT: cmovel %ecx, %edx
+; AVX-NEXT: vpinsrb $7, %edx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $8, %xmm0, %edx
+; AVX-NEXT: bsfl %edx, %edx
+; AVX-NEXT: cmovel %eax, %edx
+; AVX-NEXT: cmpl $32, %edx
+; AVX-NEXT: cmovel %ecx, %edx
+; AVX-NEXT: vpinsrb $8, %edx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $9, %xmm0, %edx
+; AVX-NEXT: bsfl %edx, %edx
+; AVX-NEXT: cmovel %eax, %edx
+; AVX-NEXT: cmpl $32, %edx
+; AVX-NEXT: cmovel %ecx, %edx
+; AVX-NEXT: vpinsrb $9, %edx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $10, %xmm0, %edx
+; AVX-NEXT: bsfl %edx, %edx
+; AVX-NEXT: cmovel %eax, %edx
+; AVX-NEXT: cmpl $32, %edx
+; AVX-NEXT: cmovel %ecx, %edx
+; AVX-NEXT: vpinsrb $10, %edx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $11, %xmm0, %edx
+; AVX-NEXT: bsfl %edx, %edx
+; AVX-NEXT: cmovel %eax, %edx
+; AVX-NEXT: cmpl $32, %edx
+; AVX-NEXT: cmovel %ecx, %edx
+; AVX-NEXT: vpinsrb $11, %edx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $12, %xmm0, %edx
+; AVX-NEXT: bsfl %edx, %edx
+; AVX-NEXT: cmovel %eax, %edx
+; AVX-NEXT: cmpl $32, %edx
+; AVX-NEXT: cmovel %ecx, %edx
+; AVX-NEXT: vpinsrb $12, %edx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $13, %xmm0, %edx
+; AVX-NEXT: bsfl %edx, %edx
+; AVX-NEXT: cmovel %eax, %edx
+; AVX-NEXT: cmpl $32, %edx
+; AVX-NEXT: cmovel %ecx, %edx
+; AVX-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $14, %xmm0, %edx
+; AVX-NEXT: bsfl %edx, %edx
+; AVX-NEXT: cmovel %eax, %edx
+; AVX-NEXT: cmpl $32, %edx
+; AVX-NEXT: cmovel %ecx, %edx
+; AVX-NEXT: vpinsrb $14, %edx, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $15, %xmm0, %edx
+; AVX-NEXT: bsfl %edx, %edx
+; AVX-NEXT: cmovel %eax, %edx
+; AVX-NEXT: cmpl $32, %edx
+; AVX-NEXT: cmovel %ecx, %edx
+; AVX-NEXT: vpinsrb $15, %edx, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %out = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %in, i1 0)
+ ret <16 x i8> %out
+define <16 x i8> @testv16i8u(<16 x i8> %in) {
+; SSE2-LABEL: testv16i8u:
+; SSE2: # BB#0:
+; SSE2: pushq %rbx
+; SSE2: movaps %xmm0, -16(%rsp)
+; SSE2-NEXT: movzbl -1(%rsp), %eax
+; SSE2-NEXT: bsfl %eax, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: movzbl -2(%rsp), %r11d
+; SSE2-NEXT: movzbl -3(%rsp), %eax
+; SSE2-NEXT: movzbl -4(%rsp), %r9d
+; SSE2-NEXT: movzbl -5(%rsp), %edi
+; SSE2-NEXT: movzbl -6(%rsp), %r10d
+; SSE2-NEXT: movzbl -7(%rsp), %ecx
+; SSE2-NEXT: movzbl -8(%rsp), %r8d
+; SSE2-NEXT: movzbl -9(%rsp), %edx
+; SSE2-NEXT: bsfl %edx, %edx
+; SSE2-NEXT: movd %edx, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: bsfl %edi, %edx
+; SSE2-NEXT: movd %edx, %xmm0
+; SSE2-NEXT: movzbl -10(%rsp), %edx
+; SSE2-NEXT: movzbl -11(%rsp), %esi
+; SSE2-NEXT: movzbl -12(%rsp), %edi
+; SSE2-NEXT: movzbl -13(%rsp), %ebx
+; SSE2-NEXT: bsfl %ebx, %ebx
+; SSE2-NEXT: movd %ebx, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT: bsfl %eax, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: bsfl %esi, %eax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: punpcklbw %xmm0, %xmm3 # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE2-NEXT: bsfl %ecx, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: movzbl -14(%rsp), %eax
+; SSE2-NEXT: movzbl -15(%rsp), %ecx
+; SSE2-NEXT: bsfl %ecx, %ecx
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: bsfl %r11d, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: bsfl %edx, %ecx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: bsfl %r10d, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: bsfl %eax, %eax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT: bsfl %r9d, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: bsfl %edi, %eax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: bsfl %r8d, %eax
+; SSE2-NEXT: movd %eax, %xmm4
+; SSE2-NEXT: movzbl -16(%rsp), %eax
+; SSE2-NEXT: bsfl %eax, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: popq %rbx
+; SSE2-NEXT: retq
+; SSE3-LABEL: testv16i8u:
+; SSE3: # BB#0:
+; SSE3: pushq %rbx
+; SSE3: movaps %xmm0, -16(%rsp)
+; SSE3-NEXT: movzbl -1(%rsp), %eax
+; SSE3-NEXT: bsfl %eax, %eax
+; SSE3-NEXT: movd %eax, %xmm0
+; SSE3-NEXT: movzbl -2(%rsp), %r11d
+; SSE3-NEXT: movzbl -3(%rsp), %eax
+; SSE3-NEXT: movzbl -4(%rsp), %r9d
+; SSE3-NEXT: movzbl -5(%rsp), %edi
+; SSE3-NEXT: movzbl -6(%rsp), %r10d
+; SSE3-NEXT: movzbl -7(%rsp), %ecx
+; SSE3-NEXT: movzbl -8(%rsp), %r8d
+; SSE3-NEXT: movzbl -9(%rsp), %edx
+; SSE3-NEXT: bsfl %edx, %edx
+; SSE3-NEXT: movd %edx, %xmm1
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE3-NEXT: bsfl %edi, %edx
+; SSE3-NEXT: movd %edx, %xmm0
+; SSE3-NEXT: movzbl -10(%rsp), %edx
+; SSE3-NEXT: movzbl -11(%rsp), %esi
+; SSE3-NEXT: movzbl -12(%rsp), %edi
+; SSE3-NEXT: movzbl -13(%rsp), %ebx
+; SSE3-NEXT: bsfl %ebx, %ebx
+; SSE3-NEXT: movd %ebx, %xmm2
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE3-NEXT: bsfl %eax, %eax
+; SSE3-NEXT: movd %eax, %xmm0
+; SSE3-NEXT: bsfl %esi, %eax
+; SSE3-NEXT: movd %eax, %xmm3
+; SSE3-NEXT: punpcklbw %xmm0, %xmm3 # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE3-NEXT: bsfl %ecx, %eax
+; SSE3-NEXT: movd %eax, %xmm0
+; SSE3-NEXT: movzbl -14(%rsp), %eax
+; SSE3-NEXT: movzbl -15(%rsp), %ecx
+; SSE3-NEXT: bsfl %ecx, %ecx
+; SSE3-NEXT: movd %ecx, %xmm1
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE3-NEXT: bsfl %r11d, %ecx
+; SSE3-NEXT: movd %ecx, %xmm0
+; SSE3-NEXT: bsfl %edx, %ecx
+; SSE3-NEXT: movd %ecx, %xmm2
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE3-NEXT: bsfl %r10d, %ecx
+; SSE3-NEXT: movd %ecx, %xmm0
+; SSE3-NEXT: bsfl %eax, %eax
+; SSE3-NEXT: movd %eax, %xmm3
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE3-NEXT: bsfl %r9d, %eax
+; SSE3-NEXT: movd %eax, %xmm0
+; SSE3-NEXT: bsfl %edi, %eax
+; SSE3-NEXT: movd %eax, %xmm2
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE3-NEXT: bsfl %r8d, %eax
+; SSE3-NEXT: movd %eax, %xmm4
+; SSE3-NEXT: movzbl -16(%rsp), %eax
+; SSE3-NEXT: bsfl %eax, %eax
+; SSE3-NEXT: movd %eax, %xmm0
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE3-NEXT: popq %rbx
+; SSE3-NEXT: retq
+; SSSE3-LABEL: testv16i8u:
+; SSSE3: # BB#0:
+; SSSE3: pushq %rbx
+; SSSE3: movaps %xmm0, -16(%rsp)
+; SSSE3-NEXT: movzbl -1(%rsp), %eax
+; SSSE3-NEXT: bsfl %eax, %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: movzbl -2(%rsp), %r11d
+; SSSE3-NEXT: movzbl -3(%rsp), %eax
+; SSSE3-NEXT: movzbl -4(%rsp), %r9d
+; SSSE3-NEXT: movzbl -5(%rsp), %edi
+; SSSE3-NEXT: movzbl -6(%rsp), %r10d
+; SSSE3-NEXT: movzbl -7(%rsp), %ecx
+; SSSE3-NEXT: movzbl -8(%rsp), %r8d
+; SSSE3-NEXT: movzbl -9(%rsp), %edx
+; SSSE3-NEXT: bsfl %edx, %edx
+; SSSE3-NEXT: movd %edx, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: bsfl %edi, %edx
+; SSSE3-NEXT: movd %edx, %xmm0
+; SSSE3-NEXT: movzbl -10(%rsp), %edx
+; SSSE3-NEXT: movzbl -11(%rsp), %esi
+; SSSE3-NEXT: movzbl -12(%rsp), %edi
+; SSSE3-NEXT: movzbl -13(%rsp), %ebx
+; SSSE3-NEXT: bsfl %ebx, %ebx
+; SSSE3-NEXT: movd %ebx, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSSE3-NEXT: bsfl %eax, %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: bsfl %esi, %eax
+; SSSE3-NEXT: movd %eax, %xmm3
+; SSSE3-NEXT: punpcklbw %xmm0, %xmm3 # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSSE3-NEXT: bsfl %ecx, %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: movzbl -14(%rsp), %eax
+; SSSE3-NEXT: movzbl -15(%rsp), %ecx
+; SSSE3-NEXT: bsfl %ecx, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: bsfl %r11d, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: bsfl %edx, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSSE3-NEXT: bsfl %r10d, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: bsfl %eax, %eax
+; SSSE3-NEXT: movd %eax, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSSE3-NEXT: bsfl %r9d, %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: bsfl %edi, %eax
+; SSSE3-NEXT: movd %eax, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSSE3-NEXT: bsfl %r8d, %eax
+; SSSE3-NEXT: movd %eax, %xmm4
+; SSSE3-NEXT: movzbl -16(%rsp), %eax
+; SSSE3-NEXT: bsfl %eax, %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT: popq %rbx
+; SSSE3-NEXT: retq
+; SSE41-LABEL: testv16i8u:
+; SSE41: # BB#0:
+; SSE41-NEXT: pextrb $1, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: pextrb $0, %xmm0, %ecx
+; SSE41-NEXT: bsfl %ecx, %ecx
+; SSE41-NEXT: movd %ecx, %xmm1
+; SSE41-NEXT: pinsrb $1, %eax, %xmm1
+; SSE41-NEXT: pextrb $2, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: pinsrb $2, %eax, %xmm1
+; SSE41-NEXT: pextrb $3, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: pinsrb $3, %eax, %xmm1
+; SSE41-NEXT: pextrb $4, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: pinsrb $4, %eax, %xmm1
+; SSE41-NEXT: pextrb $5, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: pinsrb $5, %eax, %xmm1
+; SSE41-NEXT: pextrb $6, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: pinsrb $6, %eax, %xmm1
+; SSE41-NEXT: pextrb $7, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: pinsrb $7, %eax, %xmm1
+; SSE41-NEXT: pextrb $8, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: pinsrb $8, %eax, %xmm1
+; SSE41-NEXT: pextrb $9, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: pinsrb $9, %eax, %xmm1
+; SSE41-NEXT: pextrb $10, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: pinsrb $10, %eax, %xmm1
+; SSE41-NEXT: pextrb $11, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: pinsrb $11, %eax, %xmm1
+; SSE41-NEXT: pextrb $12, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: pinsrb $12, %eax, %xmm1
+; SSE41-NEXT: pextrb $13, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: pinsrb $13, %eax, %xmm1
+; SSE41-NEXT: pextrb $14, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: pinsrb $14, %eax, %xmm1
+; SSE41-NEXT: pextrb $15, %xmm0, %eax
+; SSE41-NEXT: bsfl %eax, %eax
+; SSE41-NEXT: pinsrb $15, %eax, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+; AVX-LABEL: testv16i8u:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrb $1, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: vpextrb $0, %xmm0, %ecx
+; AVX-NEXT: bsfl %ecx, %ecx
+; AVX-NEXT: vmovd %ecx, %xmm1
+; AVX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $2, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $3, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $4, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $5, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $6, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $7, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $8, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $9, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $10, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $11, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $12, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $13, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $14, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpextrb $15, %xmm0, %eax
+; AVX-NEXT: bsfl %eax, %eax
+; AVX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %out = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %in, i1 -1)
+ ret <16 x i8> %out
+define <2 x i64> @foldv2i64() {
+; SSE-LABEL: foldv2i64:
+; SSE: # BB#0:
+; SSE-NEXT: movl $8, %eax
+; SSE-NEXT: movd %rax, %xmm0
+; SSE-NEXT: retq
+; AVX-LABEL: foldv2i64:
+; AVX: # BB#0:
+; AVX-NEXT: movl $8, %eax
+; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: retq
+ %out = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> <i64 256, i64 -1>, i1 0)
+ ret <2 x i64> %out
+define <2 x i64> @foldv2i64u() {
+; SSE-LABEL: foldv2i64u:
+; SSE: # BB#0:
+; SSE-NEXT: movl $8, %eax
+; SSE-NEXT: movd %rax, %xmm0
+; SSE-NEXT: retq
+; AVX-LABEL: foldv2i64u:
+; AVX: # BB#0:
+; AVX-NEXT: movl $8, %eax
+; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: retq
+ %out = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> <i64 256, i64 -1>, i1 -1)
+ ret <2 x i64> %out
+define <4 x i32> @foldv4i32() {
+; SSE-LABEL: foldv4i32:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,32,0]
+; SSE-NEXT: retq
+; AVX-LABEL: foldv4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
+; AVX-NEXT: retq
+ %out = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> <i32 256, i32 -1, i32 0, i32 255>, i1 0)
+ ret <4 x i32> %out
+define <4 x i32> @foldv4i32u() {
+; SSE-LABEL: foldv4i32u:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,32,0]
+; SSE-NEXT: retq
+; AVX-LABEL: foldv4i32u:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
+; AVX-NEXT: retq
+ %out = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> <i32 256, i32 -1, i32 0, i32 255>, i1 -1)
+ ret <4 x i32> %out
+define <8 x i16> @foldv8i16() {
+; SSE-LABEL: foldv8i16:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
+; SSE-NEXT: retq
+; AVX-LABEL: foldv8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
+; AVX-NEXT: retq
+ %out = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88>, i1 0)
+ ret <8 x i16> %out
+define <8 x i16> @foldv8i16u() {
+; SSE-LABEL: foldv8i16u:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
+; SSE-NEXT: retq
+; AVX-LABEL: foldv8i16u:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
+; AVX-NEXT: retq
+ %out = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88>, i1 -1)
+ ret <8 x i16> %out
+define <16 x i8> @foldv16i8() {
+; SSE-LABEL: foldv16i8:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
+; SSE-NEXT: retq
+; AVX-LABEL: foldv16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
+; AVX-NEXT: retq
+ %out = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32>, i1 0)
+ ret <16 x i8> %out
+define <16 x i8> @foldv16i8u() {
+; SSE-LABEL: foldv16i8u:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
+; SSE-NEXT: retq
+; AVX-LABEL: foldv16i8u:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
+; AVX-NEXT: retq
+ %out = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32>, i1 -1)
+ ret <16 x i8> %out
+declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
+declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
+declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
+declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
diff --git a/test/CodeGen/X86/vector-tzcnt-256.ll b/test/CodeGen/X86/vector-tzcnt-256.ll
new file mode 100644
index 0000000..8f744f7
--- /dev/null
+++ b/test/CodeGen/X86/vector-tzcnt-256.ll
@@ -0,0 +1,1195 @@
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+target triple = "x86_64-unknown-unknown"
+define <4 x i64> @testv4i64(<4 x i64> %in) {
+; AVX1-LABEL: testv4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpextrq $1, %xmm1, %rax
+; AVX1-NEXT: bsfq %rax, %rax
+; AVX1-NEXT: movl $64, %ecx
+; AVX1-NEXT: cmoveq %rcx, %rax
+; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vmovq %xmm1, %rax
+; AVX1-NEXT: bsfq %rax, %rax
+; AVX1-NEXT: cmoveq %rcx, %rax
+; AVX1-NEXT: vmovq %rax, %xmm1
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: bsfq %rax, %rax
+; AVX1-NEXT: cmoveq %rcx, %rax
+; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: bsfq %rax, %rax
+; AVX1-NEXT: cmoveq %rcx, %rax
+; AVX1-NEXT: vmovq %rax, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpextrq $1, %xmm1, %rax
+; AVX2-NEXT: bsfq %rax, %rax
+; AVX2-NEXT: movl $64, %ecx
+; AVX2-NEXT: cmoveq %rcx, %rax
+; AVX2-NEXT: vmovq %rax, %xmm2
+; AVX2-NEXT: vmovq %xmm1, %rax
+; AVX2-NEXT: bsfq %rax, %rax
+; AVX2-NEXT: cmoveq %rcx, %rax
+; AVX2-NEXT: vmovq %rax, %xmm1
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX2-NEXT: vpextrq $1, %xmm0, %rax
+; AVX2-NEXT: bsfq %rax, %rax
+; AVX2-NEXT: cmoveq %rcx, %rax
+; AVX2-NEXT: vmovq %rax, %xmm2
+; AVX2-NEXT: vmovq %xmm0, %rax
+; AVX2-NEXT: bsfq %rax, %rax
+; AVX2-NEXT: cmoveq %rcx, %rax
+; AVX2-NEXT: vmovq %rax, %xmm0
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %in, i1 0)
+ ret <4 x i64> %out
+define <4 x i64> @testv4i64u(<4 x i64> %in) {
+; AVX1-LABEL: testv4i64u:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpextrq $1, %xmm1, %rax
+; AVX1-NEXT: bsfq %rax, %rax
+; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vmovq %xmm1, %rax
+; AVX1-NEXT: bsfq %rax, %rax
+; AVX1-NEXT: vmovq %rax, %xmm1
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: bsfq %rax, %rax
+; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: bsfq %rax, %rax
+; AVX1-NEXT: vmovq %rax, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv4i64u:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpextrq $1, %xmm1, %rax
+; AVX2-NEXT: bsfq %rax, %rax
+; AVX2-NEXT: vmovq %rax, %xmm2
+; AVX2-NEXT: vmovq %xmm1, %rax
+; AVX2-NEXT: bsfq %rax, %rax
+; AVX2-NEXT: vmovq %rax, %xmm1
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX2-NEXT: vpextrq $1, %xmm0, %rax
+; AVX2-NEXT: bsfq %rax, %rax
+; AVX2-NEXT: vmovq %rax, %xmm2
+; AVX2-NEXT: vmovq %xmm0, %rax
+; AVX2-NEXT: bsfq %rax, %rax
+; AVX2-NEXT: vmovq %rax, %xmm0
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %in, i1 -1)
+ ret <4 x i64> %out
+define <8 x i32> @testv8i32(<8 x i32> %in) {
+; AVX1-LABEL: testv8i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpextrd $1, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %ecx
+; AVX1-NEXT: movl $32, %eax
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: vmovd %xmm1, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: vmovd %edx, %xmm2
+; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $2, %xmm1, %ecx
+; AVX1-NEXT: bsfl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $3, %xmm1, %ecx
+; AVX1-NEXT: bsfl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm1
+; AVX1-NEXT: vpextrd $1, %xmm0, %ecx
+; AVX1-NEXT: bsfl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: vmovd %xmm0, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: vmovd %edx, %xmm2
+; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $2, %xmm0, %ecx
+; AVX1-NEXT: bsfl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $3, %xmm0, %ecx
+; AVX1-NEXT: bsfl %ecx, %ecx
+; AVX1-NEXT: cmovel %eax, %ecx
+; AVX1-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv8i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpextrd $1, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %ecx
+; AVX2-NEXT: movl $32, %eax
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: vmovd %xmm1, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: vmovd %edx, %xmm2
+; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $2, %xmm1, %ecx
+; AVX2-NEXT: bsfl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $3, %xmm1, %ecx
+; AVX2-NEXT: bsfl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm1
+; AVX2-NEXT: vpextrd $1, %xmm0, %ecx
+; AVX2-NEXT: bsfl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: vmovd %xmm0, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: vmovd %edx, %xmm2
+; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $2, %xmm0, %ecx
+; AVX2-NEXT: bsfl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $3, %xmm0, %ecx
+; AVX2-NEXT: bsfl %ecx, %ecx
+; AVX2-NEXT: cmovel %eax, %ecx
+; AVX2-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %in, i1 0)
+ ret <8 x i32> %out
+define <8 x i32> @testv8i32u(<8 x i32> %in) {
+; AVX1-LABEL: testv8i32u:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpextrd $1, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vmovd %xmm1, %ecx
+; AVX1-NEXT: bsfl %ecx, %ecx
+; AVX1-NEXT: vmovd %ecx, %xmm2
+; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $2, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $3, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
+; AVX1-NEXT: vpextrd $1, %xmm0, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vmovd %xmm0, %ecx
+; AVX1-NEXT: bsfl %ecx, %ecx
+; AVX1-NEXT: vmovd %ecx, %xmm2
+; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $2, %xmm0, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $3, %xmm0, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv8i32u:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpextrd $1, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vmovd %xmm1, %ecx
+; AVX2-NEXT: bsfl %ecx, %ecx
+; AVX2-NEXT: vmovd %ecx, %xmm2
+; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $2, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $3, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
+; AVX2-NEXT: vpextrd $1, %xmm0, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vmovd %xmm0, %ecx
+; AVX2-NEXT: bsfl %ecx, %ecx
+; AVX2-NEXT: vmovd %ecx, %xmm2
+; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $2, %xmm0, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $3, %xmm0, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %in, i1 -1)
+ ret <8 x i32> %out
+define <16 x i16> @testv16i16(<16 x i16> %in) {
+; AVX1-LABEL: testv16i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpextrw $1, %xmm1, %eax
+; AVX1-NEXT: bsfw %ax, %cx
+; AVX1-NEXT: movw $16, %ax
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: vmovd %xmm1, %edx
+; AVX1-NEXT: bsfw %dx, %dx
+; AVX1-NEXT: cmovew %ax, %dx
+; AVX1-NEXT: vmovd %edx, %xmm2
+; AVX1-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $2, %xmm1, %ecx
+; AVX1-NEXT: bsfw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $3, %xmm1, %ecx
+; AVX1-NEXT: bsfw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $4, %xmm1, %ecx
+; AVX1-NEXT: bsfw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $5, %xmm1, %ecx
+; AVX1-NEXT: bsfw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $6, %xmm1, %ecx
+; AVX1-NEXT: bsfw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $7, %xmm1, %ecx
+; AVX1-NEXT: bsfw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm1
+; AVX1-NEXT: vpextrw $1, %xmm0, %ecx
+; AVX1-NEXT: bsfw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: vmovd %xmm0, %edx
+; AVX1-NEXT: bsfw %dx, %dx
+; AVX1-NEXT: cmovew %ax, %dx
+; AVX1-NEXT: vmovd %edx, %xmm2
+; AVX1-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $2, %xmm0, %ecx
+; AVX1-NEXT: bsfw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $3, %xmm0, %ecx
+; AVX1-NEXT: bsfw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $4, %xmm0, %ecx
+; AVX1-NEXT: bsfw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $5, %xmm0, %ecx
+; AVX1-NEXT: bsfw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $6, %xmm0, %ecx
+; AVX1-NEXT: bsfw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $7, %xmm0, %ecx
+; AVX1-NEXT: bsfw %cx, %cx
+; AVX1-NEXT: cmovew %ax, %cx
+; AVX1-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv16i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpextrw $1, %xmm1, %eax
+; AVX2-NEXT: bsfw %ax, %cx
+; AVX2-NEXT: movw $16, %ax
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: vmovd %xmm1, %edx
+; AVX2-NEXT: bsfw %dx, %dx
+; AVX2-NEXT: cmovew %ax, %dx
+; AVX2-NEXT: vmovd %edx, %xmm2
+; AVX2-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $2, %xmm1, %ecx
+; AVX2-NEXT: bsfw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $3, %xmm1, %ecx
+; AVX2-NEXT: bsfw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $4, %xmm1, %ecx
+; AVX2-NEXT: bsfw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $5, %xmm1, %ecx
+; AVX2-NEXT: bsfw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $6, %xmm1, %ecx
+; AVX2-NEXT: bsfw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $7, %xmm1, %ecx
+; AVX2-NEXT: bsfw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm1
+; AVX2-NEXT: vpextrw $1, %xmm0, %ecx
+; AVX2-NEXT: bsfw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: vmovd %xmm0, %edx
+; AVX2-NEXT: bsfw %dx, %dx
+; AVX2-NEXT: cmovew %ax, %dx
+; AVX2-NEXT: vmovd %edx, %xmm2
+; AVX2-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $2, %xmm0, %ecx
+; AVX2-NEXT: bsfw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $3, %xmm0, %ecx
+; AVX2-NEXT: bsfw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $4, %xmm0, %ecx
+; AVX2-NEXT: bsfw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $5, %xmm0, %ecx
+; AVX2-NEXT: bsfw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $6, %xmm0, %ecx
+; AVX2-NEXT: bsfw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $7, %xmm0, %ecx
+; AVX2-NEXT: bsfw %cx, %cx
+; AVX2-NEXT: cmovew %ax, %cx
+; AVX2-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %in, i1 0)
+ ret <16 x i16> %out
+define <16 x i16> @testv16i16u(<16 x i16> %in) {
+; AVX1-LABEL: testv16i16u:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpextrw $1, %xmm1, %eax
+; AVX1-NEXT: bsfw %ax, %ax
+; AVX1-NEXT: vmovd %xmm1, %ecx
+; AVX1-NEXT: bsfw %cx, %cx
+; AVX1-NEXT: vmovd %ecx, %xmm2
+; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $2, %xmm1, %eax
+; AVX1-NEXT: bsfw %ax, %ax
+; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $3, %xmm1, %eax
+; AVX1-NEXT: bsfw %ax, %ax
+; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $4, %xmm1, %eax
+; AVX1-NEXT: bsfw %ax, %ax
+; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $5, %xmm1, %eax
+; AVX1-NEXT: bsfw %ax, %ax
+; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $6, %xmm1, %eax
+; AVX1-NEXT: bsfw %ax, %ax
+; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $7, %xmm1, %eax
+; AVX1-NEXT: bsfw %ax, %ax
+; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
+; AVX1-NEXT: vpextrw $1, %xmm0, %eax
+; AVX1-NEXT: bsfw %ax, %ax
+; AVX1-NEXT: vmovd %xmm0, %ecx
+; AVX1-NEXT: bsfw %cx, %cx
+; AVX1-NEXT: vmovd %ecx, %xmm2
+; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $2, %xmm0, %eax
+; AVX1-NEXT: bsfw %ax, %ax
+; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $3, %xmm0, %eax
+; AVX1-NEXT: bsfw %ax, %ax
+; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $4, %xmm0, %eax
+; AVX1-NEXT: bsfw %ax, %ax
+; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $5, %xmm0, %eax
+; AVX1-NEXT: bsfw %ax, %ax
+; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $6, %xmm0, %eax
+; AVX1-NEXT: bsfw %ax, %ax
+; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrw $7, %xmm0, %eax
+; AVX1-NEXT: bsfw %ax, %ax
+; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv16i16u:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpextrw $1, %xmm1, %eax
+; AVX2-NEXT: bsfw %ax, %ax
+; AVX2-NEXT: vmovd %xmm1, %ecx
+; AVX2-NEXT: bsfw %cx, %cx
+; AVX2-NEXT: vmovd %ecx, %xmm2
+; AVX2-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $2, %xmm1, %eax
+; AVX2-NEXT: bsfw %ax, %ax
+; AVX2-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $3, %xmm1, %eax
+; AVX2-NEXT: bsfw %ax, %ax
+; AVX2-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $4, %xmm1, %eax
+; AVX2-NEXT: bsfw %ax, %ax
+; AVX2-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $5, %xmm1, %eax
+; AVX2-NEXT: bsfw %ax, %ax
+; AVX2-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $6, %xmm1, %eax
+; AVX2-NEXT: bsfw %ax, %ax
+; AVX2-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $7, %xmm1, %eax
+; AVX2-NEXT: bsfw %ax, %ax
+; AVX2-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
+; AVX2-NEXT: vpextrw $1, %xmm0, %eax
+; AVX2-NEXT: bsfw %ax, %ax
+; AVX2-NEXT: vmovd %xmm0, %ecx
+; AVX2-NEXT: bsfw %cx, %cx
+; AVX2-NEXT: vmovd %ecx, %xmm2
+; AVX2-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $2, %xmm0, %eax
+; AVX2-NEXT: bsfw %ax, %ax
+; AVX2-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $3, %xmm0, %eax
+; AVX2-NEXT: bsfw %ax, %ax
+; AVX2-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $4, %xmm0, %eax
+; AVX2-NEXT: bsfw %ax, %ax
+; AVX2-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $5, %xmm0, %eax
+; AVX2-NEXT: bsfw %ax, %ax
+; AVX2-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $6, %xmm0, %eax
+; AVX2-NEXT: bsfw %ax, %ax
+; AVX2-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrw $7, %xmm0, %eax
+; AVX2-NEXT: bsfw %ax, %ax
+; AVX2-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %in, i1 -1)
+ ret <16 x i16> %out
+define <32 x i8> @testv32i8(<32 x i8> %in) {
+; AVX1-LABEL: testv32i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpextrb $1, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %edx
+; AVX1-NEXT: movl $32, %eax
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: movl $8, %ecx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpextrb $0, %xmm1, %esi
+; AVX1-NEXT: bsfl %esi, %esi
+; AVX1-NEXT: cmovel %eax, %esi
+; AVX1-NEXT: cmpl $32, %esi
+; AVX1-NEXT: cmovel %ecx, %esi
+; AVX1-NEXT: vmovd %esi, %xmm2
+; AVX1-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $2, %xmm1, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $3, %xmm1, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $3, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $4, %xmm1, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $5, %xmm1, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $6, %xmm1, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $7, %xmm1, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $8, %xmm1, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $8, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $9, %xmm1, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $9, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $10, %xmm1, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $10, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $11, %xmm1, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $11, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $12, %xmm1, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $13, %xmm1, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $13, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $14, %xmm1, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $15, %xmm1, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $15, %edx, %xmm2, %xmm1
+; AVX1-NEXT: vpextrb $1, %xmm0, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpextrb $0, %xmm0, %esi
+; AVX1-NEXT: bsfl %esi, %esi
+; AVX1-NEXT: cmovel %eax, %esi
+; AVX1-NEXT: cmpl $32, %esi
+; AVX1-NEXT: cmovel %ecx, %esi
+; AVX1-NEXT: vmovd %esi, %xmm2
+; AVX1-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $2, %xmm0, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $3, %xmm0, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $3, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $4, %xmm0, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $5, %xmm0, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $6, %xmm0, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $7, %xmm0, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $8, %xmm0, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $8, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $9, %xmm0, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $9, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $10, %xmm0, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $10, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $11, %xmm0, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $11, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $12, %xmm0, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $13, %xmm0, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $13, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $14, %xmm0, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $15, %xmm0, %edx
+; AVX1-NEXT: bsfl %edx, %edx
+; AVX1-NEXT: cmovel %eax, %edx
+; AVX1-NEXT: cmpl $32, %edx
+; AVX1-NEXT: cmovel %ecx, %edx
+; AVX1-NEXT: vpinsrb $15, %edx, %xmm2, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv32i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpextrb $1, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %edx
+; AVX2-NEXT: movl $32, %eax
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: movl $8, %ecx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpextrb $0, %xmm1, %esi
+; AVX2-NEXT: bsfl %esi, %esi
+; AVX2-NEXT: cmovel %eax, %esi
+; AVX2-NEXT: cmpl $32, %esi
+; AVX2-NEXT: cmovel %ecx, %esi
+; AVX2-NEXT: vmovd %esi, %xmm2
+; AVX2-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $2, %xmm1, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $3, %xmm1, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $3, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $4, %xmm1, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $5, %xmm1, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $6, %xmm1, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $7, %xmm1, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $8, %xmm1, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $8, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $9, %xmm1, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $9, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $10, %xmm1, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $10, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $11, %xmm1, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $11, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $12, %xmm1, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $13, %xmm1, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $13, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $14, %xmm1, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $15, %xmm1, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $15, %edx, %xmm2, %xmm1
+; AVX2-NEXT: vpextrb $1, %xmm0, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpextrb $0, %xmm0, %esi
+; AVX2-NEXT: bsfl %esi, %esi
+; AVX2-NEXT: cmovel %eax, %esi
+; AVX2-NEXT: cmpl $32, %esi
+; AVX2-NEXT: cmovel %ecx, %esi
+; AVX2-NEXT: vmovd %esi, %xmm2
+; AVX2-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $2, %xmm0, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $3, %xmm0, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $3, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $4, %xmm0, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $5, %xmm0, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $6, %xmm0, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $7, %xmm0, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $8, %xmm0, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $8, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $9, %xmm0, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $9, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $10, %xmm0, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $10, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $11, %xmm0, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $11, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $12, %xmm0, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $13, %xmm0, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $13, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $14, %xmm0, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $15, %xmm0, %edx
+; AVX2-NEXT: bsfl %edx, %edx
+; AVX2-NEXT: cmovel %eax, %edx
+; AVX2-NEXT: cmpl $32, %edx
+; AVX2-NEXT: cmovel %ecx, %edx
+; AVX2-NEXT: vpinsrb $15, %edx, %xmm2, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %in, i1 0)
+ ret <32 x i8> %out
+define <32 x i8> @testv32i8u(<32 x i8> %in) {
+; AVX1-LABEL: testv32i8u:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpextrb $1, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
+; AVX1-NEXT: bsfl %ecx, %ecx
+; AVX1-NEXT: vmovd %ecx, %xmm2
+; AVX1-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $2, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $3, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $4, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $5, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $6, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $7, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $8, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $9, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $10, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $11, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $12, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $13, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $14, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $15, %xmm1, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
+; AVX1-NEXT: vpextrb $1, %xmm0, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
+; AVX1-NEXT: bsfl %ecx, %ecx
+; AVX1-NEXT: vmovd %ecx, %xmm2
+; AVX1-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $2, %xmm0, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $3, %xmm0, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $4, %xmm0, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $5, %xmm0, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $6, %xmm0, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $7, %xmm0, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $8, %xmm0, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $9, %xmm0, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $10, %xmm0, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $11, %xmm0, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $12, %xmm0, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $13, %xmm0, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $14, %xmm0, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrb $15, %xmm0, %eax
+; AVX1-NEXT: bsfl %eax, %eax
+; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+; AVX2-LABEL: testv32i8u:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpextrb $1, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
+; AVX2-NEXT: bsfl %ecx, %ecx
+; AVX2-NEXT: vmovd %ecx, %xmm2
+; AVX2-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $2, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $3, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $4, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $5, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $6, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $7, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $8, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $9, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $10, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $11, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $12, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $13, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $14, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $15, %xmm1, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
+; AVX2-NEXT: vpextrb $1, %xmm0, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
+; AVX2-NEXT: bsfl %ecx, %ecx
+; AVX2-NEXT: vmovd %ecx, %xmm2
+; AVX2-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $2, %xmm0, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $3, %xmm0, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $4, %xmm0, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $5, %xmm0, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $6, %xmm0, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $7, %xmm0, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $8, %xmm0, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $9, %xmm0, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $10, %xmm0, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $11, %xmm0, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $12, %xmm0, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $13, %xmm0, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $14, %xmm0, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrb $15, %xmm0, %eax
+; AVX2-NEXT: bsfl %eax, %eax
+; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %in, i1 -1)
+ ret <32 x i8> %out
+define <4 x i64> @foldv4i64() {
+; AVX-LABEL: foldv4i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
+; AVX-NEXT: retq
+ %out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0)
+ ret <4 x i64> %out
+define <4 x i64> @foldv4i64u() {
+; AVX-LABEL: foldv4i64u:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
+; AVX-NEXT: retq
+ %out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1)
+ ret <4 x i64> %out
+define <8 x i32> @foldv8i32() {
+; AVX-LABEL: foldv8i32:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
+; AVX-NEXT: retq
+ %out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0)
+ ret <8 x i32> %out
+define <8 x i32> @foldv8i32u() {
+; AVX-LABEL: foldv8i32u:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
+; AVX-NEXT: retq
+ %out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1)
+ ret <8 x i32> %out
+define <16 x i16> @foldv16i16() {
+; AVX-LABEL: foldv16i16:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
+; AVX-NEXT: retq
+ %out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0)
+ ret <16 x i16> %out
+define <16 x i16> @foldv16i16u() {
+; AVX-LABEL: foldv16i16u:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
+; AVX-NEXT: retq
+ %out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1)
+ ret <16 x i16> %out
+define <32 x i8> @foldv32i8() {
+; AVX-LABEL: foldv32i8:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
+; AVX-NEXT: retq
+ %out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0)
+ ret <32 x i8> %out
+define <32 x i8> @foldv32i8u() {
+; AVX-LABEL: foldv32i8u:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
+; AVX-NEXT: retq
+ %out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1)
+ ret <32 x i8> %out
+declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1)
+declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
+declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1)
+declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1)
diff --git a/test/CodeGen/X86/vector-zext.ll b/test/CodeGen/X86/vector-zext.ll
index 4278183..c64e174 100644
--- a/test/CodeGen/X86/vector-zext.ll
+++ b/test/CodeGen/X86/vector-zext.ll
@@ -9,7 +9,6 @@ define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: # kill
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
; SSE2-NEXT: pand .LCPI0_0(%rip), %xmm1
@@ -19,7 +18,6 @@ define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pxor %xmm2, %xmm2
-; SSSE3-NEXT: # kill
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
; SSSE3-NEXT: pand .LCPI0_0(%rip), %xmm1
@@ -156,7 +154,6 @@ define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) {
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: # kill
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; SSE2-NEXT: pand .LCPI3_0(%rip), %xmm1
@@ -166,7 +163,6 @@ define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) {
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pxor %xmm2, %xmm2
-; SSSE3-NEXT: # kill
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; SSSE3-NEXT: pand .LCPI3_0(%rip), %xmm1
@@ -334,7 +330,6 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: # kill
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE2-NEXT: retq
@@ -343,7 +338,6 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pxor %xmm2, %xmm2
-; SSSE3-NEXT: # kill
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSSE3-NEXT: retq
@@ -366,7 +360,6 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone
; AVX2-LABEL: shuf_zext_8i16_to_8i32:
; AVX2: # BB#0: # %entry
-; AVX2-NEXT: # kill
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: retq
@@ -380,7 +373,6 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: # kill
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSE2-NEXT: retq
@@ -389,7 +381,6 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pxor %xmm2, %xmm2
-; SSSE3-NEXT: # kill
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSSE3-NEXT: retq
@@ -413,7 +404,6 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone
; AVX2-LABEL: shuf_zext_4i32_to_4i64:
; AVX2: # BB#0: # %entry
-; AVX2-NEXT: # kill
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX2-NEXT: retq
diff --git a/test/CodeGen/X86/win32-eh-states.ll b/test/CodeGen/X86/win32-eh-states.ll
new file mode 100644
index 0000000..8db127d
--- /dev/null
+++ b/test/CodeGen/X86/win32-eh-states.ll
@@ -0,0 +1,112 @@
+; RUN: llc -mtriple=i686-pc-windows-msvc < %s | FileCheck %s
+; Based on this source:
+; extern "C" void may_throw(int);
+; void f() {
+; try {
+; may_throw(1);
+; try {
+; may_throw(2);
+; } catch (int) {
+; may_throw(3);
+; }
+; } catch (int) {
+; may_throw(4);
+; }
+; }
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchHandlerType = type { i32, i8* }
+declare void @may_throw(i32)
+declare i32 @__CxxFrameHandler3(...)
+declare void*, i8*)
+declare void
+declare i32*)
+$"\01??_R0H@8" = comdat any
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
+define void @f() #0 {
+ invoke void @may_throw(i32 1)
+ to label %invoke.cont unwind label %lpad
+invoke.cont: ; preds = %entry
+ invoke void @may_throw(i32 2)
+ to label %try.cont.9 unwind label %lpad.1
+try.cont.9: ; preds = %invoke.cont.3, %invoke.cont, %catch.7
+ ; FIXME: Something about our CFG breaks TailDuplication. This empy asm blocks
+ ; it so we can focus on testing the state numbering.
+ call void asm sideeffect "", "~{dirflag},~{fpsr},~{flags}"()
+ ret void
+lpad: ; preds = %catch, %entry
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch %eh.CatchHandlerType*
+ %1 = extractvalue { i8*, i32 } %0, 0
+ %2 = extractvalue { i8*, i32 } %0, 1
+ br label %catch.dispatch.4
+lpad.1: ; preds = %invoke.cont
+ %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* bitcast (%eh.CatchHandlerType* to i8*)
+ %4 = extractvalue { i8*, i32 } %3, 0
+ %5 = extractvalue { i8*, i32 } %3, 1
+ %6 = tail call i32* bitcast (%eh.CatchHandlerType* to i8*)) #3
+ %matches = icmp eq i32 %5, %6
+ br i1 %matches, label %catch, label %catch.dispatch.4
+catch.dispatch.4: ; preds = %lpad.1, %lpad
+ %exn.slot.0 = phi i8* [ %4, %lpad.1 ], [ %1, %lpad ]
+ %ehselector.slot.0 = phi i32 [ %5, %lpad.1 ], [ %2, %lpad ]
+ %.pre = tail call i32* bitcast (%eh.CatchHandlerType* to i8*)) #3
+ %matches6 = icmp eq i32 %ehselector.slot.0, %.pre
+ br i1 %matches6, label %catch.7, label %eh.resume
+catch.7: ; preds = %catch.dispatch.4
+ tail call void* %exn.slot.0, i8* null) #3
+ tail call void @may_throw(i32 4)
+ tail call void #3
+ br label %try.cont.9
+catch: ; preds = %lpad.1
+ tail call void* %4, i8* null) #3
+ invoke void @may_throw(i32 3)
+ to label %invoke.cont.3 unwind label %lpad
+invoke.cont.3: ; preds = %catch
+ tail call void #3
+ br label %try.cont.9
+eh.resume: ; preds = %catch.dispatch.4
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
+ %lpad.val.12 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
+ resume { i8*, i32 } %lpad.val.12
+; CHECK: movl $-1, [[state:[-0-9]+]](%ebp)
+; CHECK: movl $___ehhandler$f, {{.*}}
+; CHECK: movl $0, [[state]](%ebp)
+; CHECK: movl $1, (%esp)
+; CHECK: calll _may_throw
+; CHECK: movl $1, [[state]](%ebp)
+; CHECK: movl $2, (%esp)
+; CHECK: calll _may_throw
+; CHECK-LABEL: _f.catch:
+; CHECK: movl $4, Lf$frame_escape_{{[0-9]+.*}}
+; CHECK: movl $4, (%esp)
+; CHECK: calll _may_throw
+; CHECK-LABEL: _f.catch.1:
+; CHECK: movl $3, Lf$frame_escape_{{[0-9]+.*}}
+; CHECK: movl $3, (%esp)
+; CHECK: calll _may_throw
diff --git a/test/CodeGen/X86/win32-eh.ll b/test/CodeGen/X86/win32-eh.ll
index 4d3c34e..42c9d9e 100644
--- a/test/CodeGen/X86/win32-eh.ll
+++ b/test/CodeGen/X86/win32-eh.ll
@@ -19,16 +19,18 @@ catchall:
; CHECK-LABEL: _use_except_handler3:
+; CHECK: pushl %ebp
+; CHECK: movl %esp, %ebp
; CHECK: subl ${{[0-9]+}}, %esp
-; CHECK: movl $-1, 12(%esp)
-; CHECK: movl $L__ehtable$use_except_handler3, 8(%esp)
-; CHECK: movl $__except_handler3, 4(%esp)
+; CHECK: movl $-1, -4(%ebp)
+; CHECK: movl $L__ehtable$use_except_handler3, -8(%ebp)
+; CHECK: leal -16(%ebp), %[[node:[^ ,]*]]
+; CHECK: movl $__except_handler3, -12(%ebp)
; CHECK: movl %fs:0, %[[next:[^ ,]*]]
-; CHECK: movl %[[next]], (%esp)
-; CHECK: leal (%esp), %[[node:[^ ,]*]]
+; CHECK: movl %[[next]], -16(%ebp)
; CHECK: movl %[[node]], %fs:0
; CHECK: calll _may_throw_or_crash
-; CHECK: movl (%esp), %[[next:[^ ,]*]]
+; CHECK: movl -16(%ebp), %[[next:[^ ,]*]]
; CHECK: movl %[[next]], %fs:0
; CHECK: retl
@@ -44,17 +46,21 @@ catchall:
; CHECK-LABEL: _use_except_handler4:
+; CHECK: pushl %ebp
+; CHECK: movl %esp, %ebp
; CHECK: subl ${{[0-9]+}}, %esp
-; CHECK: movl %esp, (%esp)
-; CHECK: movl $-2, 20(%esp)
-; CHECK: movl $L__ehtable$use_except_handler4, 4(%esp)
-; CHECK: leal 8(%esp), %[[node:[^ ,]*]]
-; CHECK: movl $__except_handler4, 12(%esp)
+; CHECK: movl %esp, -24(%ebp)
+; CHECK: movl $-2, -4(%ebp)
+; CHECK: movl $L__ehtable$use_except_handler4, %[[lsda:[^ ,]*]]
+; CHECK: xorl ___security_cookie, %[[lsda]]
+; CHECK: movl %[[lsda]], -8(%ebp)
+; CHECK: leal -16(%ebp), %[[node:[^ ,]*]]
+; CHECK: movl $__except_handler4, -12(%ebp)
; CHECK: movl %fs:0, %[[next:[^ ,]*]]
-; CHECK: movl %[[next]], 8(%esp)
+; CHECK: movl %[[next]], -16(%ebp)
; CHECK: movl %[[node]], %fs:0
; CHECK: calll _may_throw_or_crash
-; CHECK: movl 8(%esp), %[[next:[^ ,]*]]
+; CHECK: movl -16(%ebp), %[[next:[^ ,]*]]
; CHECK: movl %[[next]], %fs:0
; CHECK: retl
@@ -73,19 +79,34 @@ catchall:
; CHECK-LABEL: _use_CxxFrameHandler3:
+; CHECK: pushl %ebp
+; CHECK: movl %esp, %ebp
; CHECK: subl ${{[0-9]+}}, %esp
-; CHECK: movl %esp, (%esp)
-; CHECK: movl $-1, 12(%esp)
-; CHECK: leal 4(%esp), %[[node:[^ ,]*]]
-; CHECK: movl $___ehhandler$use_CxxFrameHandler3, 8(%esp)
+; CHECK: movl %esp, -16(%ebp)
+; CHECK: movl $-1, -4(%ebp)
+; CHECK: leal -12(%ebp), %[[node:[^ ,]*]]
+; CHECK: movl $___ehhandler$use_CxxFrameHandler3, -8(%ebp)
; CHECK: movl %fs:0, %[[next:[^ ,]*]]
-; CHECK: movl %[[next]], 4(%esp)
+; CHECK: movl %[[next]], -12(%ebp)
; CHECK: movl %[[node]], %fs:0
+; CHECK: movl $0, -4(%ebp)
; CHECK: calll _may_throw_or_crash
-; CHECK: movl 4(%esp), %[[next:[^ ,]*]]
+; CHECK: movl -12(%ebp), %[[next:[^ ,]*]]
; CHECK: movl %[[next]], %fs:0
; CHECK: retl
+; CHECK: .section .xdata,"dr"
+; CHECK-LABEL: L__ehtable$use_CxxFrameHandler3:
+; CHECK-NEXT: .long 429065506
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long ($stateUnwindMap$use_CxxFrameHandler3)
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long ($tryMap$use_CxxFrameHandler3)
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 1
; CHECK-LABEL: ___ehhandler$use_CxxFrameHandler3:
; CHECK: movl $L__ehtable$use_CxxFrameHandler3, %eax
; CHECK: jmp ___CxxFrameHandler3 # TAILCALL
diff --git a/test/DebugInfo/Inputs/dwarfdump.elf-mips64-64-bit-dwarf b/test/DebugInfo/Inputs/dwarfdump.elf-mips64-64-bit-dwarf
new file mode 100755
index 0000000..5dbfea5
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump.elf-mips64-64-bit-dwarf
Binary files differ
diff --git a/test/DebugInfo/Inputs/invalid.elf b/test/DebugInfo/Inputs/invalid.elf
new file mode 100644
index 0000000..23acad8
--- /dev/null
+++ b/test/DebugInfo/Inputs/invalid.elf
Binary files differ
diff --git a/test/DebugInfo/Inputs/invalid.elf.2 b/test/DebugInfo/Inputs/invalid.elf.2
new file mode 100644
index 0000000..16528a5
--- /dev/null
+++ b/test/DebugInfo/Inputs/invalid.elf.2
@@ -0,0 +1 @@
+ELF'L:þB“ELFï \ No newline at end of file
diff --git a/test/DebugInfo/Inputs/invalid.elf.3 b/test/DebugInfo/Inputs/invalid.elf.3
new file mode 100644
index 0000000..f494460
--- /dev/null
+++ b/test/DebugInfo/Inputs/invalid.elf.3
@@ -0,0 +1 @@
+ELF‚Lþ'ELFB \ No newline at end of file
diff --git a/test/DebugInfo/Inputs/test-multiple-macho.o b/test/DebugInfo/Inputs/test-multiple-macho.o
new file mode 100644
index 0000000..428a1af
--- /dev/null
+++ b/test/DebugInfo/Inputs/test-multiple-macho.o
Binary files differ
diff --git a/test/DebugInfo/Inputs/test-simple-macho.o b/test/DebugInfo/Inputs/test-simple-macho.o
new file mode 100644
index 0000000..8ae4154
--- /dev/null
+++ b/test/DebugInfo/Inputs/test-simple-macho.o
Binary files differ
diff --git a/test/DebugInfo/X86/expressions.ll b/test/DebugInfo/X86/expressions.ll
new file mode 100644
index 0000000..52c1b08
--- /dev/null
+++ b/test/DebugInfo/X86/expressions.ll
@@ -0,0 +1,110 @@
+; REQUIRES: object-emission
+; RUN: llc -mtriple x86_64-apple-darwin14.0.0-elf -filetype=obj %s -o %t
+; RUN: llc -mtriple x86_64-apple-darwin14.0.0-elf -O0 -filetype=obj %s -o %t0
+; RUN: llvm-dwarfdump -debug-dump=loc %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=loc %t0 | FileCheck -check-prefix CHECK-O0 %s
+; CHECK: 0x00000000: Beginning address offset: 0x0000000000000000
+; CHECK: Ending address offset: 0x[[END:[0-9a-f]+]]
+; CHECK: Location description:
+; CHECK-NOT: 75 00 55
+; CHECK: 0x00000023: Beginning address offset: 0x0000000000000000
+; CHECK: Ending address offset: 0x{{.*}}[[END]]
+; CHECK: Location description: 75 08 9f
+; CHECK: 0x00000048: Beginning address offset: 0x0000000000000000
+; CHECK: Ending address offset: 0x{{.*}}[[END]]
+; CHECK: Location description: 75 10 9f
+; CHECK: 0x0000006d: Beginning address offset: 0x0000000000000000
+; CHECK: Ending address offset: 0x{{.*}}[[END]]
+; CHECK: Location description: 75 18
+; CHECK-O0: 0x00000000: Beginning address offset: 0x0000000000000000
+; CHECK-O0: Ending address offset: 0x000000000000001b
+; CHECK-O0: Location description: 55
+; CHECK-O0: Beginning address offset: 0x000000000000001b
+; CHECK-O0: Ending address offset: 0x0000000000000024
+; CHECK-O0: Location description: 54
+; CHECK-O0: Beginning address offset: 0x0000000000000024
+; CHECK-O0: Ending address offset: 0x0000000000000025
+; CHECK-O0: Location description: 77 78 23 00
+; CHECK-O0: 0x0000004c: Beginning address offset: 0x0000000000000000
+; CHECK-O0: Ending address offset: 0x000000000000001b
+; CHECK-O0: Location description: 75 08 9f
+; CHECK-O0: Beginning address offset: 0x000000000000001b
+; CHECK-O0: Ending address offset: 0x0000000000000024
+; CHECK-O0: Location description: 74 08 9f
+; CHECK-O0: Beginning address offset: 0x0000000000000024
+; CHECK-O0: Ending address offset: 0x0000000000000025
+; CHECK-O0: Location description: 77 78 23 08
+; CHECK-O0: 0x0000009c: Beginning address offset: 0x0000000000000000
+; CHECK-O0: Ending address offset: 0x000000000000001b
+; CHECK-O0: Location description: 75 10 9f
+; CHECK-O0: Beginning address offset: 0x000000000000001b
+; CHECK-O0: Ending address offset: 0x0000000000000024
+; CHECK-O0: Location description: 74 10 9f
+; CHECK-O0: Beginning address offset: 0x0000000000000024
+; CHECK-O0: Ending address offset: 0x0000000000000025
+; CHECK-O0: Location description: 77 78 23 08 23 08
+; CHECK-O0: 0x000000ee: Beginning address offset: 0x0000000000000000
+; CHECK-O0: Ending address offset: 0x000000000000001b
+; CHECK-O0: Location description: 75 18
+; CHECK-O0: Beginning address offset: 0x000000000000001b
+; CHECK-O0: Ending address offset: 0x0000000000000024
+; CHECK-O0: Location description: 74 18
+; CHECK-O0: Beginning address offset: 0x0000000000000024
+; CHECK-O0: Ending address offset: 0x0000000000000025
+; CHECK-O0: Location description: 77 78 23 10 23 08 06
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #0
+define float @foo(float* %args, float *%args2)
+ call void @llvm.dbg.value(metadata float* %args, i64 0, metadata !11, metadata !12), !dbg !19
+ call void @llvm.dbg.value(metadata float* %args, i64 0, metadata !13, metadata !14), !dbg !19
+ call void @llvm.dbg.value(metadata float* %args, i64 0, metadata !15, metadata !16), !dbg !19
+ call void @llvm.dbg.value(metadata float* %args, i64 0, metadata !17, metadata !18), !dbg !19
+ %a = load float, float* %args, !dbg !19
+ %bptr = getelementptr float, float* %args, i32 1, !dbg !19
+ %b = load float, float* %bptr, !dbg !19
+ %cptr = getelementptr float, float* %args, i32 2, !dbg !19
+ %c = load float, float* %cptr, !dbg !19
+ %dptr = getelementptr float, float* %args, i32 3, !dbg !19
+ %d = load float, float* %dptr, !dbg !19
+ %ret1 = fadd float %a, %b, !dbg !19
+ %ret2 = fadd float %c, %d, !dbg !19
+ call void @llvm.dbg.value(metadata float* %args2, i64 0, metadata !11, metadata !12), !dbg !19
+ call void @llvm.dbg.value(metadata float* %args2, i64 0, metadata !13, metadata !14), !dbg !19
+ call void @llvm.dbg.value(metadata float* %args2, i64 0, metadata !15, metadata !16), !dbg !19
+ call void @llvm.dbg.value(metadata float* %args2, i64 0, metadata !17, metadata !18), !dbg !19
+ %ret = fsub float %ret1, %ret2, !dbg !19
+ ret float %ret, !dbg !19
+attributes #0 = { nounwind readnone }
+!llvm.module.flags = !{!0, !1}
+! = !{!2}
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 1, !"Debug Info Version", i32 3}
+!2 = !DICompileUnit(language: DW_LANG_C89, file: !3, producer: "byHand", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !4, retainedTypes: !4, subprograms: !5, globals: !4, imports: !4)
+!3 = !DIFile(filename: "expressions", directory: ".")
+!4 = !{}
+!5 = !{!6}
+!6 = !DISubprogram(name: "foo", linkageName: "foo", scope: null, file: !3, type: !7, isLocal: false, isDefinition: true, isOptimized: true, function: float (float*, float*)* @foo, variables: !4)
+!7 = !DISubroutineType(types: !8)
+!8 = !{!10, !10}
+!9 = !DIBasicType(name: "float", size: 4, align: 4, encoding: DW_ATE_float)
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, align: 64)
+!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", arg: 1, scope: !6, file: !3, line: 1, type: !10)
+!12 = !DIExpression(DW_OP_plus, 0)
+!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 2, scope: !6, file: !3, line: 1, type: !10)
+!14 = !DIExpression(DW_OP_plus, 8)
+!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", arg: 3, scope: !6, file: !3, line: 1, type: !10)
+!16 = !DIExpression(DW_OP_plus, 8, DW_OP_plus, 8)
+!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "d", arg: 4, scope: !6, file: !3, line: 1, type: !9)
+!18 = !DIExpression(DW_OP_plus, 16, DW_OP_plus, 8, DW_OP_deref)
+!19 = !DILocation(line: 1, scope: !6)
diff --git a/test/DebugInfo/X86/inlined-indirect-value.ll b/test/DebugInfo/X86/inlined-indirect-value.ll
new file mode 100644
index 0000000..7f95691
--- /dev/null
+++ b/test/DebugInfo/X86/inlined-indirect-value.ll
@@ -0,0 +1,81 @@
+; RUN: llc -filetype=asm -asm-verbose=0 < %s | FileCheck %s
+; "1" from line 09 in the snippet below shouldn't be marked with location of "1"
+; from line 04. Instead it will have location inside main() (real location is
+; just erased, so it won't be perfectly accurate).
+; options: -g -O3
+; 01 volatile int x;
+; 02 int y;
+; 03 static __attribute__((always_inline)) int f1() {
+; 04 if (x * 3 < 14) return 1;
+; 05 return 2;
+; 06 }
+; 07 int main() {
+; 08 x = f1();
+; 09 x = x ? 1 : 2;
+; 10 }
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+@x = common global i32 0, align 4
+@y = common global i32 0, align 4
+define i32 @main() {
+; CHECK: .loc 1 {{[89]}}
+; CHECK-NOT: .loc
+; CHECK: movl $1
+ %0 = load volatile i32, i32* @x, align 4, !dbg !16, !tbaa !19
+ %mul.i = mul nsw i32 %0, 3, !dbg !23
+ %cmp.i = icmp slt i32 %mul.i, 14, !dbg !24
+ %..i = select i1 %cmp.i, i32 1, i32 2, !dbg !25
+ store volatile i32 %..i, i32* @x, align 4, !dbg !27, !tbaa !19
+ %1 = load volatile i32, i32* @x, align 4, !dbg !28, !tbaa !19
+ %tobool = icmp ne i32 %1, 0, !dbg !28
+ br i1 %tobool, label %select.end, label %select.mid
+select.mid: ; preds = %entry
+ br label %select.end
+select.end: ; preds = %entry, %select.mid
+ %cond = phi i32 [ 1, %entry ], [ 2, %select.mid ]
+ store volatile i32 %cond, i32* @x, align 4, !dbg !29, !tbaa !19
+ ret i32 0, !dbg !30
+! = !{!0}
+!llvm.module.flags = !{!13, !14}
+!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
+!1 = !DIFile(filename: "inline-break.c", directory: "/build/dir")
+!2 = !{}
+!3 = !{!4, !8}
+!4 = !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, isOptimized: true, function: i32 ()* @main, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !DISubprogram(name: "f1", scope: !1, file: !1, line: 3, type: !5, isLocal: true, isDefinition: true, scopeLine: 3, isOptimized: true, variables: !2)
+!9 = !{!10, !12}
+!10 = !DIGlobalVariable(name: "x", scope: !0, file: !1, line: 1, type: !11, isLocal: false, isDefinition: true, variable: i32* @x)
+!11 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !7)
+!12 = !DIGlobalVariable(name: "y", scope: !0, file: !1, line: 2, type: !7, isLocal: false, isDefinition: true, variable: i32* @y)
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!16 = !DILocation(line: 4, column: 9, scope: !17, inlinedAt: !18)
+!17 = distinct !DILexicalBlock(scope: !8, file: !1, line: 4, column: 9)
+!18 = distinct !DILocation(line: 8, column: 9, scope: !4)
+!19 = !{!20, !20, i64 0}
+!20 = !{!"int", !21, i64 0}
+!21 = !{!"omnipotent char", !22, i64 0}
+!22 = !{!"Simple C/C++ TBAA"}
+!23 = !DILocation(line: 4, column: 11, scope: !17, inlinedAt: !18)
+!24 = !DILocation(line: 4, column: 15, scope: !17, inlinedAt: !18)
+!25 = !DILocation(line: 4, column: 21, scope: !26, inlinedAt: !18)
+!26 = !DILexicalBlockFile(scope: !17, file: !1, discriminator: 1)
+!27 = !DILocation(line: 8, column: 7, scope: !4)
+!28 = !DILocation(line: 9, column: 9, scope: !4)
+!29 = !DILocation(line: 9, column: 7, scope: !4)
+!30 = !DILocation(line: 10, column: 1, scope: !4)
diff --git a/test/DebugInfo/debuglineinfo-macho.test b/test/DebugInfo/debuglineinfo-macho.test
new file mode 100644
index 0000000..0c63dd6
--- /dev/null
+++ b/test/DebugInfo/debuglineinfo-macho.test
@@ -0,0 +1,43 @@
+# Check that relocations get applied
+RUN: llvm-dwarfdump %p/Inputs/test-simple-macho.o | FileCheck %s
+RUN: llvm-dwarfdump %p/Inputs/test-multiple-macho.o | FileCheck %s
+RUN: llvm-rtdyld -printline %p/Inputs/test-multiple-macho.o | FileCheck %s
+RUN: llvm-rtdyld -printobjline %p/Inputs/test-multiple-macho.o | FileCheck %s
+CHECK-NOT: error: failed to compute relocation: X86_64_RELOC_UNSIGNED
+# Check that relocations get applied correctly
+RUN: llvm-rtdyld -printobjline %p/Inputs/test-simple-macho.o \
+RUN: | FileCheck %s -check-prefix TEST_SIMPLE
+RUN: llvm-rtdyld -printline %p/Inputs/test-simple-macho.o \
+RUN: | FileCheck %s -check-prefix TEST_SIMPLE
+RUN: llvm-rtdyld -printobjline %p/Inputs/test-multiple-macho.o \
+RUN: | FileCheck %s -check-prefix TEST_MULTIPLE
+RUN: llvm-rtdyld -printline %p/Inputs/test-multiple-macho.o \
+RUN: | FileCheck %s -check-prefix TEST_MULTIPLE
+TEST_SIMPLE: Function: _foo, Size = 11
+TEST_SIMPLE-NEXT: Line info @ 0: simple.c, line:1
+TEST_SIMPLE-NEXT: Line info @ 7: simple.c, line:2
+TEST_SIMPLE-NOT: Line info @ 11: simple.c, line:2
+TEST_MULTIPLE: Function: _bar, Size = 48
+TEST_MULTIPLE-NEXT: Line info @ 0: multiple.c, line:5
+TEST_MULTIPLE-NEXT: Line info @ 7: multiple.c, line:6
+TEST_MULTIPLE-NEXT: Line info @ 16: multiple.c, line:9
+TEST_MULTIPLE-NEXT: Line info @ 21: multiple.c, line:9
+TEST_MULTIPLE-NEXT: Line info @ 26: multiple.c, line:7
+TEST_MULTIPLE-NEXT: Line info @ 33: multiple.c, line:10
+TEST_MULTIPLE-NOT: Line info @ 48: multiple.c, line:12
+TEST_MULTIPLE-NEXT: Function: _foo, Size = 16
+TEST_MULTIPLE-NEXT: Line info @ 0: multiple.c, line:1
+TEST_MULTIPLE-NEXT: Line info @ 7: multiple.c, line:2
+TEST_MULTIPLE-NOT: Line info @ 16: multiple.c, line:5
+TEST_MULTIPLE-NEXT: Function: _fubar, Size = 46
+TEST_MULTIPLE-NEXT: Line info @ 0: multiple.c, line:12
+TEST_MULTIPLE-NEXT: Line info @ 7: multiple.c, line:13
+TEST_MULTIPLE-NEXT: Line info @ 12: multiple.c, line:17
+TEST_MULTIPLE-NEXT: Line info @ 25: multiple.c, line:15
+TEST_MULTIPLE-NEXT: Line info @ 34: multiple.c, line:19
+TEST_MULTIPLE-NEXT: Line info @ 41: multiple.c, line:21
+TEST_MULTIPLE-NOT: Line info @ 46: multiple.c, line:21
diff --git a/test/DebugInfo/debuglineinfo.test b/test/DebugInfo/debuglineinfo.test
index 96a3228..720ba12 100644
--- a/test/DebugInfo/debuglineinfo.test
+++ b/test/DebugInfo/debuglineinfo.test
@@ -18,7 +18,6 @@ TEST_INLINE-NEXT: Line info @ 165: test-inline.cpp, line:35
TEST_INLINE-NEXT: Function: _Z3foov, Size = 3
TEST_INLINE-NEXT: Line info @ 0: test-inline.cpp, line:28
TEST_INLINE-NEXT: Line info @ 2: test-inline.cpp, line:29
-TEST_INLINE-NEXT: Line info @ 3: test-inline.cpp, line:29
TEST_INLINE-NEXT: Function: main, Size = 146
TEST_INLINE-NEXT: Line info @ 0: test-inline.cpp, line:39
TEST_INLINE-NEXT: Line info @ 21: test-inline.cpp, line:41
@@ -29,7 +28,6 @@ TEST_INLINE-NEXT: Line info @ 90: test-inline.cpp, line:45
TEST_INLINE-NEXT: Line info @ 95: test-inline.cpp, line:46
TEST_INLINE-NEXT: Line info @ 114: test-inline.cpp, line:48
TEST_INLINE-NEXT: Line info @ 141: test-inline.cpp, line:49
-TEST_INLINE-NEXT: Line info @ 146: test-inline.cpp, line:49
; This test checks the case where all code is in a single section.
TEST_PARAMETERS: Function: _Z15test_parametersPfPA2_dR11char_structPPitm, Size = 170
@@ -49,5 +47,4 @@ TEST_PARAMETERS-NEXT: Line info @ 90: test-parameters.cpp, line:45
TEST_PARAMETERS-NEXT: Line info @ 95: test-parameters.cpp, line:46
TEST_PARAMETERS-NEXT: Line info @ 114: test-parameters.cpp, line:48
TEST_PARAMETERS-NEXT: Line info @ 141: test-parameters.cpp, line:49
-TEST_PARAMETERS-NEXT: Line info @ 146: test-parameters.cpp, line:49
diff --git a/test/DebugInfo/dwarfdump-64-bit-dwarf.test b/test/DebugInfo/dwarfdump-64-bit-dwarf.test
new file mode 100644
index 0000000..0a24414
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-64-bit-dwarf.test
@@ -0,0 +1,15 @@
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump.elf-mips64-64-bit-dwarf \
+RUN: --debug-dump=line | FileCheck %s
+# FIXME: llvm-dwarfdump's support for 64-bit dwarf is currently limited to
+# .debug_line.
+CHECK: total_length: 0x00000212
+CHECK: version: 2
+CHECK:prologue_length: 0x000001ab
+CHECK:min_inst_length: 1
+CHECK:default_is_stmt: 1
+CHECK: line_base: -5
+CHECK: line_range: 14
+CHECK: opcode_base: 13
+CHECK: is_stmt end_sequence
diff --git a/test/DebugInfo/dwarfdump-invalid.test b/test/DebugInfo/dwarfdump-invalid.test
new file mode 100644
index 0000000..da5b23e
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-invalid.test
@@ -0,0 +1,6 @@
+; Verify that llvm-dwarfdump doesn't crash on broken input files.
+RUN: llvm-dwarfdump %p/Inputs/invalid.elf 2>&1 | FileCheck %s --check-prefix=INVALID-ELF
+RUN: llvm-dwarfdump %p/Inputs/invalid.elf.2 2>&1 | FileCheck %s --check-prefix=INVALID-ELF
+RUN: llvm-dwarfdump %p/Inputs/invalid.elf.3 2>&1 | FileCheck %s --check-prefix=INVALID-ELF
+INVALID-ELF: Invalid data was encountered while parsing the file
diff --git a/test/ExecutionEngine/MCJIT/cross-module-sm-pic-a.ll b/test/ExecutionEngine/MCJIT/cross-module-sm-pic-a.ll
index eb41424..0433199 100644
--- a/test/ExecutionEngine/MCJIT/cross-module-sm-pic-a.ll
+++ b/test/ExecutionEngine/MCJIT/cross-module-sm-pic-a.ll
@@ -1,5 +1,5 @@
; RUN: %lli -extra-module=%p/Inputs/cross-module-b.ll -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips, i686, i386
+; XFAIL: mips-, mipsel-, i686, i386
declare i32 @FB()
diff --git a/test/ExecutionEngine/MCJIT/eh-lg-pic.ll b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
index 9277ec4..222196f 100644
--- a/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
+++ b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
@@ -1,5 +1,5 @@
; RUN: %lli -relocation-model=pic -code-model=large %s
-; XFAIL: cygwin, win32, mingw, mips, i686, i386, aarch64, arm, asan, msan
+; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, aarch64, arm, asan, msan
declare i8* @__cxa_allocate_exception(i64)
declare void @__cxa_throw(i8*, i8*, i8*)
declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/MCJIT/eh-sm-pic.ll b/test/ExecutionEngine/MCJIT/eh-sm-pic.ll
index 37fb628..c73dcca 100644
--- a/test/ExecutionEngine/MCJIT/eh-sm-pic.ll
+++ b/test/ExecutionEngine/MCJIT/eh-sm-pic.ll
@@ -1,5 +1,5 @@
; RUN: %lli -relocation-model=pic -code-model=small %s
-; XFAIL: cygwin, win32, mingw, mips, i686, i386, darwin, aarch64, arm, asan, msan
+; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, darwin, aarch64, arm, asan, msan
declare i8* @__cxa_allocate_exception(i64)
declare void @__cxa_throw(i8*, i8*, i8*)
declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/MCJIT/multi-module-sm-pic-a.ll b/test/ExecutionEngine/MCJIT/multi-module-sm-pic-a.ll
index 01faecc..7e5710d 100644
--- a/test/ExecutionEngine/MCJIT/multi-module-sm-pic-a.ll
+++ b/test/ExecutionEngine/MCJIT/multi-module-sm-pic-a.ll
@@ -1,5 +1,5 @@
; RUN: %lli -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips, i686, i386
+; XFAIL: mips-, mipsel-, i686, i386
declare i32 @FB()
diff --git a/test/ExecutionEngine/MCJIT/remote/cross-module-sm-pic-a.ll b/test/ExecutionEngine/MCJIT/remote/cross-module-sm-pic-a.ll
index d47fc6c..415fd25 100644
--- a/test/ExecutionEngine/MCJIT/remote/cross-module-sm-pic-a.ll
+++ b/test/ExecutionEngine/MCJIT/remote/cross-module-sm-pic-a.ll
@@ -1,5 +1,5 @@
; RUN: %lli -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips, i686, i386, arm
+; XFAIL: mips-, mipsel-, i686, i386, arm
declare i32 @FB()
diff --git a/test/ExecutionEngine/MCJIT/remote/multi-module-sm-pic-a.ll b/test/ExecutionEngine/MCJIT/remote/multi-module-sm-pic-a.ll
index d248c4b..3e020dc 100644
--- a/test/ExecutionEngine/MCJIT/remote/multi-module-sm-pic-a.ll
+++ b/test/ExecutionEngine/MCJIT/remote/multi-module-sm-pic-a.ll
@@ -1,5 +1,5 @@
; RUN: %lli -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips, i686, i386, arm
+; XFAIL: mips-, mipsel-, i686, i386, arm
declare i32 @FB()
diff --git a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
index f9184b4..e350b85 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
@@ -1,5 +1,5 @@
; RUN: %lli -remote-mcjit -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips, aarch64, arm, i686, i386
+; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386
@count = global i32 1, align 4
diff --git a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
index d9a4faa..7162e92 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
@@ -1,5 +1,5 @@
; RUN: %lli -remote-mcjit -O0 -relocation-model=pic -code-model=small %s
-; XFAIL: mips, aarch64, arm, i686, i386
+; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386
@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
@ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4
diff --git a/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll b/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll
index 6eebe44..46545ce 100644
--- a/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll
+++ b/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll
@@ -1,5 +1,5 @@
; RUN: %lli -disable-lazy-compilation=false -relocation-model=pic -code-model=small %s
-; XFAIL: mips, i686, i386, aarch64, arm
+; XFAIL: mips-, mipsel-, i686, i386, aarch64, arm
define i32 @main() nounwind {
diff --git a/test/ExecutionEngine/MCJIT/test-global-init-nonzero-sm-pic.ll b/test/ExecutionEngine/MCJIT/test-global-init-nonzero-sm-pic.ll
index 8ae496d..e54135f 100644
--- a/test/ExecutionEngine/MCJIT/test-global-init-nonzero-sm-pic.ll
+++ b/test/ExecutionEngine/MCJIT/test-global-init-nonzero-sm-pic.ll
@@ -1,5 +1,5 @@
; RUN: %lli -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips, aarch64, arm, i686, i386
+; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386
@count = global i32 1, align 4
diff --git a/test/ExecutionEngine/MCJIT/test-ptr-reloc-sm-pic.ll b/test/ExecutionEngine/MCJIT/test-ptr-reloc-sm-pic.ll
index d50ba9d..eea6fde 100644
--- a/test/ExecutionEngine/MCJIT/test-ptr-reloc-sm-pic.ll
+++ b/test/ExecutionEngine/MCJIT/test-ptr-reloc-sm-pic.ll
@@ -1,5 +1,5 @@
; RUN: %lli -O0 -relocation-model=pic -code-model=small %s
-; XFAIL: mips, aarch64, arm, i686, i386
+; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386
@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
@ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4
diff --git a/test/ExecutionEngine/OrcMCJIT/cross-module-sm-pic-a.ll b/test/ExecutionEngine/OrcMCJIT/cross-module-sm-pic-a.ll
index 4f5afd0..9af82e7 100644
--- a/test/ExecutionEngine/OrcMCJIT/cross-module-sm-pic-a.ll
+++ b/test/ExecutionEngine/OrcMCJIT/cross-module-sm-pic-a.ll
@@ -1,5 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -extra-module=%p/Inputs/cross-module-b.ll -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips, i686, i386
+; XFAIL: mips-, mipsel-, i686, i386
declare i32 @FB()
diff --git a/test/ExecutionEngine/OrcMCJIT/eh-lg-pic.ll b/test/ExecutionEngine/OrcMCJIT/eh-lg-pic.ll
index 32b53c4..47674dd 100644
--- a/test/ExecutionEngine/OrcMCJIT/eh-lg-pic.ll
+++ b/test/ExecutionEngine/OrcMCJIT/eh-lg-pic.ll
@@ -1,5 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -relocation-model=pic -code-model=large %s
-; XFAIL: cygwin, win32, mingw, mips, i686, i386, aarch64, arm, asan, msan
+; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, aarch64, arm, asan, msan
declare i8* @__cxa_allocate_exception(i64)
declare void @__cxa_throw(i8*, i8*, i8*)
declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/OrcMCJIT/eh-sm-pic.ll b/test/ExecutionEngine/OrcMCJIT/eh-sm-pic.ll
index a54795e..c279720 100644
--- a/test/ExecutionEngine/OrcMCJIT/eh-sm-pic.ll
+++ b/test/ExecutionEngine/OrcMCJIT/eh-sm-pic.ll
@@ -1,5 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -relocation-model=pic -code-model=small %s
-; XFAIL: cygwin, win32, mingw, mips, i686, i386, darwin, aarch64, arm, asan, msan
+; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, darwin, aarch64, arm, asan, msan
declare i8* @__cxa_allocate_exception(i64)
declare void @__cxa_throw(i8*, i8*, i8*)
declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/OrcMCJIT/multi-module-sm-pic-a.ll b/test/ExecutionEngine/OrcMCJIT/multi-module-sm-pic-a.ll
index 80b8e16..0052c01 100644
--- a/test/ExecutionEngine/OrcMCJIT/multi-module-sm-pic-a.ll
+++ b/test/ExecutionEngine/OrcMCJIT/multi-module-sm-pic-a.ll
@@ -1,5 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips, i686, i386
+; XFAIL: mips-, mipsel-, i686, i386
declare i32 @FB()
diff --git a/test/ExecutionEngine/OrcMCJIT/remote/cross-module-sm-pic-a.ll b/test/ExecutionEngine/OrcMCJIT/remote/cross-module-sm-pic-a.ll
index c897904..4326fc1 100644
--- a/test/ExecutionEngine/OrcMCJIT/remote/cross-module-sm-pic-a.ll
+++ b/test/ExecutionEngine/OrcMCJIT/remote/cross-module-sm-pic-a.ll
@@ -1,5 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips, i686, i386, arm
+; XFAIL: mips-, mipsel-, i686, i386, arm
declare i32 @FB()
diff --git a/test/ExecutionEngine/OrcMCJIT/remote/multi-module-sm-pic-a.ll b/test/ExecutionEngine/OrcMCJIT/remote/multi-module-sm-pic-a.ll
index 41812d2..18a2d7e 100644
--- a/test/ExecutionEngine/OrcMCJIT/remote/multi-module-sm-pic-a.ll
+++ b/test/ExecutionEngine/OrcMCJIT/remote/multi-module-sm-pic-a.ll
@@ -1,5 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips, i686, i386, arm
+; XFAIL: mips-, mipsel-, i686, i386, arm
declare i32 @FB()
diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-sm-pic.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-sm-pic.ll
index ec9e1ae..f1e9313 100644
--- a/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-sm-pic.ll
+++ b/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-sm-pic.ll
@@ -1,5 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips, aarch64, arm, i686, i386
+; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386
@count = global i32 1, align 4
diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-sm-pic.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-sm-pic.ll
index 61321ac..cac800e 100644
--- a/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-sm-pic.ll
+++ b/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-sm-pic.ll
@@ -1,5 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -O0 -relocation-model=pic -code-model=small %s
-; XFAIL: mips, aarch64, arm, i686, i386
+; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386
@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
@ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4
diff --git a/test/ExecutionEngine/OrcMCJIT/stubs-sm-pic.ll b/test/ExecutionEngine/OrcMCJIT/stubs-sm-pic.ll
index f354a0d..ec4183b 100644
--- a/test/ExecutionEngine/OrcMCJIT/stubs-sm-pic.ll
+++ b/test/ExecutionEngine/OrcMCJIT/stubs-sm-pic.ll
@@ -1,5 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -disable-lazy-compilation=false -relocation-model=pic -code-model=small %s
-; XFAIL: mips, i686, i386, aarch64, arm
+; XFAIL: mips-, mipsel-, i686, i386, aarch64, arm
define i32 @main() nounwind {
diff --git a/test/ExecutionEngine/OrcMCJIT/test-global-init-nonzero-sm-pic.ll b/test/ExecutionEngine/OrcMCJIT/test-global-init-nonzero-sm-pic.ll
index 7f66aec..c9b6cf7 100644
--- a/test/ExecutionEngine/OrcMCJIT/test-global-init-nonzero-sm-pic.ll
+++ b/test/ExecutionEngine/OrcMCJIT/test-global-init-nonzero-sm-pic.ll
@@ -1,5 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips, aarch64, arm, i686, i386
+; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386
@count = global i32 1, align 4
diff --git a/test/ExecutionEngine/OrcMCJIT/test-ptr-reloc-sm-pic.ll b/test/ExecutionEngine/OrcMCJIT/test-ptr-reloc-sm-pic.ll
index 6e0a425..d0bb070 100644
--- a/test/ExecutionEngine/OrcMCJIT/test-ptr-reloc-sm-pic.ll
+++ b/test/ExecutionEngine/OrcMCJIT/test-ptr-reloc-sm-pic.ll
@@ -1,5 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -O0 -relocation-model=pic -code-model=small %s
-; XFAIL: mips, aarch64, arm, i686, i386
+; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386
@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
@ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4
diff --git a/test/ExecutionEngine/RuntimeDyld/Mips/ELF_Mips64r2N64_PIC_relocations.s b/test/ExecutionEngine/RuntimeDyld/Mips/ELF_Mips64r2N64_PIC_relocations.s
new file mode 100644
index 0000000..1d8d293
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/Mips/ELF_Mips64r2N64_PIC_relocations.s
@@ -0,0 +1,164 @@
+# RUN: llvm-mc -triple=mips64el-unknown-linux -relocation-model=pic -code-model=small -filetype=obj -o %T/test_ELF_Mips64N64.o %s
+# RUN: llc -mtriple=mips64el-unknown-linux -relocation-model=pic -filetype=obj -o %T/test_ELF_ExternalFunction_Mips64N64.o %S/Inputs/ExternalFunction.ll
+# RUN: llvm-rtdyld -triple=mips64el-unknown-linux -verify -map-section test_ELF_Mips64N64.o,.text=0x1000 -map-section test_ELF_ExternalFunction_Mips64N64.o,.text=0x10000 -check=%s %/T/test_ELF_Mips64N64.o %T/test_ELF_ExternalFunction_Mips64N64.o
+ .data
+# Test R_MIPS_PC32 relocation.
+# rtdyld-check: *{4}(R_MIPS_PC32) = (foo - R_MIPS_PC32)[31:0]
+ .word foo-.
+# rtdyld-check: *{4}(R_MIPS_PC32 + 4) = (foo - tmp1)[31:0]
+ .4byte foo-tmp1
+ .text
+ .abicalls
+ .section .mdebug.abi64,"",@progbits
+ .nan legacy
+ .file "ELF_Mips64N64_PIC_relocations.ll"
+ .text
+ .globl bar
+ .align 3
+ .type bar,@function
+ .set nomicromips
+ .set nomips16
+ .ent bar
+ .frame $fp,40,$ra
+ .mask 0x00000000,0
+ .fmask 0x00000000,0
+ .set noreorder
+ .set nomacro
+ .set noat
+ daddiu $sp, $sp, -40
+ sd $ra, 32($sp)
+ sd $fp, 24($sp)
+ move $fp, $sp
+ sd $4, 16($fp)
+ lb $2, 0($4)
+ sd $4, 8($fp)
+# Test R_MIPS_26 relocation.
+# rtdyld-check: decode_operand(insn1, 0)[25:0] = foo
+ jal foo
+ nop
+# Test R_MIPS_PC16 relocation.
+# rtdyld-check: decode_operand(insn2, 1)[15:0] = foo - insn2
+ bal foo
+ nop
+ move $sp, $fp
+ ld $ra, 32($sp)
+ ld $fp, 24($sp)
+ daddiu $sp, $sp, 32
+ jr $ra
+ nop
+ .set at
+ .set macro
+ .set reorder
+ .end bar
+ .size bar, ($func_end0)-bar
+ .globl main
+ .align 3
+ .type main,@function
+ .set nomicromips
+ .set nomips16
+ .ent main
+ .frame $fp,32,$ra
+ .mask 0x00000000,0
+ .fmask 0x00000000,0
+ .set noreorder
+ .set nomacro
+ .set noat
+ daddiu $sp, $sp, -32
+ sd $ra, 24($sp)
+ sd $fp, 16($sp)
+ sd $gp, 8($sp)
+ move $fp, $sp
+# Check upper 16-bits of offset between the address of main function
+# and the global offset table.
+# rtdyld-check: decode_operand(insn3, 1)[15:0] = ((section_addr(test_ELF_Mips64N64.o, .got) + 0x7ff0) - main + 0x8000)[31:16]
+ lui $1, %hi(%neg(%gp_rel(main)))
+ daddu $1, $1, $25
+# Check lower 16-bits of offset between the address of main function
+# and the global offset table.
+# rtdyld-check: decode_operand(insn4, 2)[15:0] = ((section_addr(test_ELF_Mips64N64.o, .got) + 0x7ff0) - main)[15:0]
+ daddiu $1, $1, %lo(%neg(%gp_rel(main)))
+ sw $zero, 4($fp)
+# $gp register contains address of the .got section + 0x7FF0. 0x7FF0 is
+# the offset of $gp from the beginning of the .got section. Check that we are
+# loading address of the page pointer from correct offset. In this case
+# the page pointer is the first entry in the .got section, so offset will be
+# 0 - 0x7FF0.
+# rtdyld-check: decode_operand(insn5, 2)[15:0] = 0x8010
+# Check that the global offset table contains the page pointer.
+# rtdyld-check: *{8}(section_addr(test_ELF_Mips64N64.o, .got)) = (_str + 0x8000) & 0xffffffffffff0000
+ ld $25, %got_page(_str)($1)
+# Check the offset of _str from the page pointer.
+# rtdyld-check: decode_operand(insn6, 2)[15:0] = _str[15:0]
+ daddiu $25, $25, %got_ofst(_str)
+# Check that we are loading address of var from correct offset. In this case
+# var is the second entry in the .got section, so offset will be 8 - 0x7FF0.
+# rtdyld-check: decode_operand(insn7, 2)[15:0] = 0x8018
+# Check that the global offset table contains the address of the var.
+# rtdyld-check: *{8}(section_addr(test_ELF_Mips64N64.o, .got) + 8) = var
+ ld $2, %got_disp(var)($1)
+ sd $25, 0($2)
+# Check that we are loading address of bar from correct offset. In this case
+# bar is the third entry in the .got section, so offset will be 16 - 0x7FF0.
+# rtdyld-check: decode_operand(insn8, 2)[15:0] = 0x8020
+# Check that the global offset table contains the address of the bar.
+# rtdyld-check: *{8}(section_addr(test_ELF_Mips64N64.o, .got) + 16) = bar
+ ld $2, %call16(bar)($1)
+ move $4, $25
+ move $gp, $1
+ move $25, $2
+ jalr $25
+ nop
+ move $sp, $fp
+ ld $gp, 8($sp)
+ ld $fp, 16($sp)
+ ld $ra, 24($sp)
+ daddiu $sp, $sp, 32
+ jr $ra
+ nop
+ .set at
+ .set macro
+ .set reorder
+ .end main
+ .size main, ($func_end1)-main
+ .type _str,@object
+ .section .rodata.str1.1,"aMS",@progbits,1
+ .asciz "test"
+ .size _str, 5
+ .type var,@object
+ .comm var,8,8
+ .section ".note.GNU-stack","",@progbits
+ .text
diff --git a/test/ExecutionEngine/RuntimeDyld/Mips/ELF_O32_PIC_relocations.s b/test/ExecutionEngine/RuntimeDyld/Mips/ELF_O32_PIC_relocations.s
new file mode 100644
index 0000000..a4b145a
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/Mips/ELF_O32_PIC_relocations.s
@@ -0,0 +1,50 @@
+# RUN: llvm-mc -triple=mipsel-unknown-linux -relocation-model=pic -code-model=small -filetype=obj -o %T/test_ELF_O32.o %s
+# RUN: llc -mtriple=mipsel-unknown-linux -relocation-model=pic -filetype=obj -o %T/test_ELF_ExternalFunction_O32.o %S/Inputs/ExternalFunction.ll
+# RUN: llvm-rtdyld -triple=mipsel-unknown-linux -verify -map-section test_ELF_O32.o,.text=0x1000 -map-section test_ELF_ExternalFunction_O32.o,.text=0x10000 -check=%s %T/test_ELF_O32.o %T/test_ELF_ExternalFunction_O32.o
+# RUN: llvm-mc -triple=mips-unknown-linux -relocation-model=pic -code-model=small -filetype=obj -o %T/test_ELF_O32.o %s
+# RUN: llc -mtriple=mips-unknown-linux -relocation-model=pic -filetype=obj -o %/T/test_ELF_ExternalFunction_O32.o %S/Inputs/ExternalFunction.ll
+# RUN: llvm-rtdyld -triple=mips-unknown-linux -verify -map-section test_ELF_O32.o,.text=0x1000 -map-section test_ELF_ExternalFunction_O32.o,.text=0x10000 -check=%s %T/test_ELF_O32.o %T/test_ELF_ExternalFunction_O32.o
+ .data
+# rtdyld-check: *{4}R_MIPS_32 = foo[31:0]
+ .word foo
+# rtdyld-check: *{4}(R_MIPS_32+4) = foo[31:0]
+ .4byte foo
+# rtdyld-check: *{4}(R_MIPS_PC32) = (foo - R_MIPS_PC32)[31:0]
+ .word foo-.
+# rtdyld-check: *{4}(R_MIPS_PC32 + 4) = (foo - tmp1)[31:0]
+ .4byte foo-tmp1
+ .text
+ .abicalls
+ .nan legacy
+ .text
+ .set nomicromips
+ .set nomips16
+ .set noreorder
+ .set nomacro
+ .set noat
+ .align 3
+ .globl bar
+ .type bar,@function
+# rtdyld-check: decode_operand(R_MIPS_26, 0)[27:0] = stub_addr(test_ELF_O32.o, .text, foo)[27:0]
+# rtdyld-check: decode_operand(R_MIPS_26, 0)[1:0] = 0
+ j foo
+ nop
+# rtdyld-check: decode_operand(R_MIPS_HI16, 1)[15:0] = foo[31:16]
+ lui $1, %hi(foo)
+# rtdyld-check: decode_operand(R_MIPS_LO16, 1)[15:0] = foo[15:0]
+ lui $1, %lo(foo)
+ .size bar, .-bar
diff --git a/test/ExecutionEngine/RuntimeDyld/Mips/Inputs/ExternalFunction.ll b/test/ExecutionEngine/RuntimeDyld/Mips/Inputs/ExternalFunction.ll
new file mode 100644
index 0000000..a59d68c
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/Mips/Inputs/ExternalFunction.ll
@@ -0,0 +1,4 @@
+define void @foo() {
+ ret void
diff --git a/test/ExecutionEngine/RuntimeDyld/Mips/lit.local.cfg b/test/ExecutionEngine/RuntimeDyld/Mips/lit.local.cfg
new file mode 100644
index 0000000..a3183a2
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/Mips/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'Mips' in config.root.targets:
+ config.unsupported = True
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/MachO_i386_DynNoPIC_relocations.s b/test/ExecutionEngine/RuntimeDyld/X86/MachO_i386_DynNoPIC_relocations.s
index f427b98..6b2fe95 100644
--- a/test/ExecutionEngine/RuntimeDyld/X86/MachO_i386_DynNoPIC_relocations.s
+++ b/test/ExecutionEngine/RuntimeDyld/X86/MachO_i386_DynNoPIC_relocations.s
@@ -9,9 +9,9 @@ bar:
popl %eax
# Test section difference relocation to non-lazy ptr section.
-# rtdyld-check: decode_operand(inst1, 4) = x$non_lazy_ptr - tmp0$pb
+# rtdyld-check: decode_operand(inst1, 4) = x$non_lazy_ptr - tmp0$pb + 8
- movl x$non_lazy_ptr-tmp0$pb(%eax), %eax
+ movl (x$non_lazy_ptr-tmp0$pb)+8(%eax), %eax
movl (%eax), %ebx
# Test VANILLA relocation to jump table.
diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata-darwin.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata-darwin.ll
new file mode 100644
index 0000000..7617dbd
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata-darwin.ll
@@ -0,0 +1,12 @@
+; This test checks that we are not instrumenting globals in llvm.metadata.
+; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+@.str_noinst = private unnamed_addr constant [4 x i8] c"aaa\00", section "llvm.metadata"
+@.str_inst = private unnamed_addr constant [4 x i8] c"aaa\00"
+; CHECK-NOT: {{asan_gen.*str_noinst}}
+; CHECK: {{asan_gen.*str_inst}}
+; CHECK: @asan.module_ctor
diff --git a/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll b/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll
index 18a86a9..ceaf0e6 100644
--- a/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll
@@ -2,17 +2,13 @@
; -asan-instrument-allocas=1
; RUN: opt < %s -asan -asan-module -asan-instrument-allocas=1 -S | FileCheck %s --check-prefix=CHECK-ALLOCA
-; RUN: opt < %s -asan -asan-module -asan-instrument-allocas=0 -S | FileCheck %s --check-prefix=CHECK-NOALLOCA
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s --check-prefix=CHECK-NOALLOCA
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
define void @foo(i32 %len) sanitize_address {
-; CHECK-ALLOCA: store i32 -892679478
-; CHECK-ALLOCA: store i32 -875836469
-; CHECK-NOALLOCA-NOT: store i32 -892679478
-; CHECK-NOALLOCA-NOT: store i32 -875836469
+; CHECK-ALLOCA: __asan_alloca_poison
+; CHECK-ALLOCA: __asan_allocas_unpoison
%0 = alloca i32, align 4
%1 = alloca i8*
store volatile i32 %len, i32* %0, align 4
diff --git a/test/Instrumentation/AddressSanitizer/undecidable-dynamic-alloca-1.ll b/test/Instrumentation/AddressSanitizer/undecidable-dynamic-alloca-1.ll
deleted file mode 100644
index c67fb50..0000000
--- a/test/Instrumentation/AddressSanitizer/undecidable-dynamic-alloca-1.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; Test that undecidable dynamic allocas are skipped by ASan.
-; RUN: opt < %s -asan -asan-module -asan-instrument-allocas=1 -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-define void @g(i64 %n) sanitize_address {
- %cmp = icmp sgt i64 %n, 100
- br i1 %cmp, label %do_alloca, label %done
-; CHECK-NOT: store i32 -892679478
- %0 = alloca i8, i64 %n, align 1
- call void @f(i8* %0)
- br label %done
- ret void
-declare void @f(i8*)
diff --git a/test/Instrumentation/InstrProfiling/PR23499.ll b/test/Instrumentation/InstrProfiling/PR23499.ll
new file mode 100644
index 0000000..5aae735
--- /dev/null
+++ b/test/Instrumentation/InstrProfiling/PR23499.ll
@@ -0,0 +1,21 @@
+;; Check that data associated with linkonce odr functions are placed in
+;; the same comdat section as their associated function.
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -instrprof -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -S | FileCheck %s
+$_Z3barIvEvv = comdat any
+@__llvm_profile_name__Z3barIvEvv = linkonce_odr hidden constant [11 x i8] c"_Z3barIvEvv", align 1
+; CHECK: @__llvm_profile_name__Z3barIvEvv = linkonce_odr hidden constant [11 x i8] c"_Z3barIvEvv", section "{{.*}}__llvm_prf_names", comdat($_Z3barIvEvv), align 1
+; CHECK: @__llvm_profile_counters__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}__llvm_prf_cnts", comdat($_Z3barIvEvv), align 8
+; CHECK: @__llvm_profile_data__Z3barIvEvv = linkonce_odr hidden constant { i32, i32, i64, i8*, i64* } { i32 11, i32 1, i64 0, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__llvm_profile_name__Z3barIvEvv, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_profile_counters__Z3barIvEvv, i32 0, i32 0) }, section "{{.*}}__llvm_prf_data", comdat($_Z3barIvEvv), align 8
+declare void @llvm.instrprof.increment(i8*, i64, i32, i32) #1
+define linkonce_odr void @_Z3barIvEvv() comdat {
+ call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__llvm_profile_name__Z3barIvEvv, i32 0, i32 0), i64 0, i32 1, i32 0)
+ ret void
diff --git a/test/MC/AArch64/armv8.1a-atomic.s b/test/MC/AArch64/armv8.1a-atomic.s
new file mode 100644
index 0000000..bcfd3e7
--- /dev/null
+++ b/test/MC/AArch64/armv8.1a-atomic.s
@@ -0,0 +1,184 @@
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a -show-encoding < %s 2> %t | FileCheck %s
+// RUN: FileCheck --check-prefix=CHECK-ERROR <%t %s
+ .text
+ //8 bits
+ casb w0, w1, [x2]
+ casab w0, w1, [x2]
+ caslb w0, w1, [x2]
+ casalb w0, w1, [x2]
+//CHECK: casb w0, w1, [x2] // encoding: [0x41,0x7c,0xa0,0x08]
+//CHECK: casab w0, w1, [x2] // encoding: [0x41,0x7c,0xe0,0x08]
+//CHECK: caslb w0, w1, [x2] // encoding: [0x41,0xfc,0xa0,0x08]
+//CHECK: casalb w0, w1, [x2] // encoding: [0x41,0xfc,0xe0,0x08]
+ casb w0, w1, [w2]
+ casalb x0, x1, [x2]
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR: casb w0, w1, [w2]
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR: casalb x0, x1, [x2]
+ //16 bits
+ cash w0, w1, [x2]
+ casah w0, w1, [x2]
+ caslh w0, w1, [x2]
+ casalh w0, w1, [x2]
+//CHECK: cash w0, w1, [x2] // encoding: [0x41,0x7c,0xa0,0x48]
+//CHECK: casah w0, w1, [x2] // encoding: [0x41,0x7c,0xe0,0x48]
+//CHECK: caslh w0, w1, [x2] // encoding: [0x41,0xfc,0xa0,0x48]
+//CHECK: casalh w0, w1, [x2] // encoding: [0x41,0xfc,0xe0,0x48]
+ //32 bits
+ cas w0, w1, [x2]
+ casa w0, w1, [x2]
+ casl w0, w1, [x2]
+ casal w0, w1, [x2]
+//CHECK: cas w0, w1, [x2] // encoding: [0x41,0x7c,0xa0,0x88]
+//CHECK: casa w0, w1, [x2] // encoding: [0x41,0x7c,0xe0,0x88]
+//CHECK: casl w0, w1, [x2] // encoding: [0x41,0xfc,0xa0,0x88]
+//CHECK: casal w0, w1, [x2] // encoding: [0x41,0xfc,0xe0,0x88]
+ cas w0, w1, [w2]
+ casl w0, x1, [x2]
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR: cas w0, w1, [w2]
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR: casl w0, x1, [x2]
+ //64 bits
+ cas x0, x1, [x2]
+ casa x0, x1, [x2]
+ casl x0, x1, [x2]
+ casal x0, x1, [x2]
+//CHECK: cas x0, x1, [x2] // encoding: [0x41,0x7c,0xa0,0xc8]
+//CHECK: casa x0, x1, [x2] // encoding: [0x41,0x7c,0xe0,0xc8]
+//CHECK: casl x0, x1, [x2] // encoding: [0x41,0xfc,0xa0,0xc8]
+//CHECK: casal x0, x1, [x2] // encoding: [0x41,0xfc,0xe0,0xc8]
+ casa x0, x1, [w2]
+ casal x0, w1, [x2]
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR: casa x0, x1, [w2]
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR: casal x0, w1, [x2]
+ // LD<OP> intructions
+ ldadda x0, x1, [x2]
+ ldclrl x0, x1, [x2]
+ ldeoral x0, x1, [x2]
+ ldset x0, x1, [x2]
+ ldsmaxa w0, w1, [x2]
+ ldsminlb w0, w1, [x2]
+ ldumaxalh w0, w1, [x2]
+ ldumin w0, w1, [x2]
+ ldsminb w2, w3, [x5]
+//CHECK: ldadda x0, x1, [x2] // encoding: [0x41,0x00,0xa0,0xf8]
+//CHECK: ldclrl x0, x1, [x2] // encoding: [0x41,0x10,0x60,0xf8]
+//CHECK: ldeoral x0, x1, [x2] // encoding: [0x41,0x20,0xe0,0xf8]
+//CHECK: ldset x0, x1, [x2] // encoding: [0x41,0x30,0x20,0xf8]
+//CHECK: ldsmaxa w0, w1, [x2] // encoding: [0x41,0x40,0xa0,0xb8]
+//CHECK: ldsminlb w0, w1, [x2] // encoding: [0x41,0x50,0x60,0x38]
+//CHECK: ldumaxalh w0, w1, [x2] // encoding: [0x41,0x60,0xe0,0x78]
+//CHECK: ldumin w0, w1, [x2] // encoding: [0x41,0x70,0x20,0xb8]
+//CHECK: ldsminb w2, w3, [x5] // encoding: [0xa3,0x50,0x22,0x38]
+ // ST<OP> intructions: aliases to LD<OP>
+ stADDlb w0, [x2]
+ stclrlh w0, [x2]
+ steorl w0, [x2]
+ stsetl x0, [x2]
+ stsmaxb w0, [x2]
+ stsminh w0, [x2]
+ stumax w0, [x2]
+ stumin x0, [x2]
+ stsminl x29, [sp]
+//CHECK: staddlb w0, [x2] // encoding: [0x5f,0x00,0x60,0x38]
+//CHECK: stclrlh w0, [x2] // encoding: [0x5f,0x10,0x60,0x78]
+//CHECK: steorl w0, [x2] // encoding: [0x5f,0x20,0x60,0xb8]
+//CHECK: stsetl x0, [x2] // encoding: [0x5f,0x30,0x60,0xf8]
+//CHECK: stsmaxb w0, [x2] // encoding: [0x5f,0x40,0x20,0x38]
+//CHECK: stsminh w0, [x2] // encoding: [0x5f,0x50,0x20,0x78]
+//CHECK: stumax w0, [x2] // encoding: [0x5f,0x60,0x20,0xb8]
+//CHECK: stumin x0, [x2] // encoding: [0x5f,0x70,0x20,0xf8]
+//CHECK: stsminl x29, [sp] // encoding: [0xff,0x53,0x7d,0xf8]
+ ldsmax x0, x1, [w2]
+ ldeorl w0, w1, [w2]
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR: ldsmax x0, x1, [w2]
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR: ldeorl w0, w1, [w2]
+ //SWP instruction
+ swp x0, x1, [x2]
+ swpb w0, w1, [x2]
+ swplh w0, w1, [x2]
+ swpal x0, x1, [sp]
+//CHECK: swp x0, x1, [x2] // encoding: [0x41,0x80,0x20,0xf8]
+//CHECK: swpb w0, w1, [x2] // encoding: [0x41,0x80,0x20,0x38]
+//CHECK: swplh w0, w1, [x2] // encoding: [0x41,0x80,0x60,0x78]
+//CHECK: swpal x0, x1, [sp] // encoding: [0xe1,0x83,0xe0,0xf8]
+ swp x0, x1, [w2]
+ swp x0, x1, [xzr]
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR: swp x0, x1, [w2]
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR: swp x0, x1, [xzr]
+ //CASP instruction
+ casp x0, x1, x2, x3, [x4]
+ casp w0, w1, w2, w3, [x4]
+//CHECK: casp x0, x1, x2, x3, [x4] // encoding: [0x82,0x7c,0x20,0x48]
+//CHECK: casp w0, w1, w2, w3, [x4] // encoding: [0x82,0x7c,0x20,0x08]
+ casp x1, x2, x4, x5, [x6]
+ casp x0, x1, x3, x4, [x5]
+ casp x0, x2, x4, x5, [x6]
+ casp x0, x1, x2, x4, [x5]
+ casp x0, w1, x2, x3, [x5]
+ casp w0, x1, x2, x3, [x5]
+ casp w0, x1, w2, w3, [x5]
+ casp x0, x1, w2, w3, [x5]
+//CHECK-ERROR: error: expected first even register of a consecutive same-size even/odd register pair
+//CHECK-ERROR: casp x1, x2, x4, x5, [x6]
+//CHECK-ERROR: error: expected first even register of a consecutive same-size even/odd register pair
+//CHECK-ERROR: casp x0, x1, x3, x4, [x5]
+//CHECK-ERROR: error: expected second odd register of a consecutive same-size even/odd register pair
+//CHECK-ERROR: casp x0, x2, x4, x5, [x6]
+//CHECK-ERROR: error: expected second odd register of a consecutive same-size even/odd register pair
+//CHECK-ERROR: casp x0, x1, x2, x4, [x5]
+//CHECK-ERROR: error: expected second odd register of a consecutive same-size even/odd register pair
+//CHECK-ERROR: casp x0, w1, x2, x3, [x5]
+//CHECK-ERROR: error: expected second odd register of a consecutive same-size even/odd register pair
+//CHECK-ERROR: casp w0, x1, x2, x3, [x5]
+//CHECK-ERROR: error: expected second odd register of a consecutive same-size even/odd register pair
+//CHECK-ERROR: casp w0, x1, w2, w3, [x5]
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR: casp x0, x1, w2, w3, [x5]
diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s
index 1d7ba71..bf7db13 100644
--- a/test/MC/AArch64/basic-a64-diagnostics.s
+++ b/test/MC/AArch64/basic-a64-diagnostics.s
@@ -3494,6 +3494,7 @@
msr ID_MMFR1_EL1, x12
msr ID_MMFR2_EL1, x12
msr ID_MMFR3_EL1, x12
+ msr ID_MMFR4_EL1, x12
msr ID_ISAR0_EL1, x12
msr ID_ISAR1_EL1, x12
msr ID_ISAR2_EL1, x12
@@ -3587,6 +3588,9 @@
// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
// CHECK-ERROR-NEXT: error: expected writable system register or pstate
diff --git a/test/MC/AArch64/basic-a64-instructions.s b/test/MC/AArch64/basic-a64-instructions.s
index 75c86ef..5d33a4f 100644
--- a/test/MC/AArch64/basic-a64-instructions.s
+++ b/test/MC/AArch64/basic-a64-instructions.s
@@ -4306,6 +4306,7 @@ _func:
mrs x9, ID_MMFR1_EL1
mrs x9, ID_MMFR2_EL1
mrs x9, ID_MMFR3_EL1
+ mrs x9, ID_MMFR4_EL1
mrs x9, ID_ISAR0_EL1
mrs x9, ID_ISAR1_EL1
mrs x9, ID_ISAR2_EL1
@@ -4606,6 +4607,7 @@ _func:
// CHECK: mrs x9, {{id_mmfr1_el1|ID_MMFR1_EL1}} // encoding: [0xa9,0x01,0x38,0xd5]
// CHECK: mrs x9, {{id_mmfr2_el1|ID_MMFR2_EL1}} // encoding: [0xc9,0x01,0x38,0xd5]
// CHECK: mrs x9, {{id_mmfr3_el1|ID_MMFR3_EL1}} // encoding: [0xe9,0x01,0x38,0xd5]
+// CHECK: mrs x9, {{id_mmfr4_el1|ID_MMFR4_EL1}} // encoding: [0xc9,0x02,0x38,0xd5]
// CHECK: mrs x9, {{id_isar0_el1|ID_ISAR0_EL1}} // encoding: [0x09,0x02,0x38,0xd5]
// CHECK: mrs x9, {{id_isar1_el1|ID_ISAR1_EL1}} // encoding: [0x29,0x02,0x38,0xd5]
// CHECK: mrs x9, {{id_isar2_el1|ID_ISAR2_EL1}} // encoding: [0x49,0x02,0x38,0xd5]
diff --git a/test/MC/AArch64/case-insen-reg-names.s b/test/MC/AArch64/case-insen-reg-names.s
new file mode 100644
index 0000000..b31ab67
--- /dev/null
+++ b/test/MC/AArch64/case-insen-reg-names.s
@@ -0,0 +1,8 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s
+fadd v0.2d, v5.2d, v6.2d
+fadd V0.2d, V5.2d, V6.2d
+fadd v0.2d, V5.2d, v6.2d
+// CHECK: fadd v0.2d, v5.2d, v6.2d // encoding: [0xa0,0xd4,0x66,0x4e]
+// CHECK: fadd v0.2d, v5.2d, v6.2d // encoding: [0xa0,0xd4,0x66,0x4e]
+// CHECK: fadd v0.2d, v5.2d, v6.2d // encoding: [0xa0,0xd4,0x66,0x4e]
diff --git a/test/MC/ARM/arm-elf-symver.s b/test/MC/ARM/arm-elf-symver.s
index 4303540..ce9c4fe 100644
--- a/test/MC/ARM/arm-elf-symver.s
+++ b/test/MC/ARM/arm-elf-symver.s
@@ -78,7 +78,7 @@ global1:
@ CHECK-NEXT: Section: .text
@ CHECK-NEXT: Symbol {
-@ CHECK-NEXT: Name: .text (0)
+@ CHECK-NEXT: Name: (0)
@ CHECK-NEXT: Value: 0x0
@ CHECK-NEXT: Size: 0
@ CHECK-NEXT: Binding: Local (0x0)
@@ -87,22 +87,22 @@ global1:
@ CHECK-NEXT: Section: .text
@ CHECK-NEXT: Symbol {
-@ CHECK-NEXT: Name: .data (0)
+@ CHECK-NEXT: Name: bar2@zed
@ CHECK-NEXT: Value: 0x0
@ CHECK-NEXT: Size: 0
-@ CHECK-NEXT: Binding: Local (0x0)
-@ CHECK-NEXT: Type: Section (0x3)
+@ CHECK-NEXT: Binding: Global (0x1)
+@ CHECK-NEXT: Type: None (0x0)
@ CHECK-NEXT: Other: 0
-@ CHECK-NEXT: Section: .data
+@ CHECK-NEXT: Section: Undefined (0x0)
@ CHECK-NEXT: Symbol {
-@ CHECK-NEXT: Name: .bss (0)
+@ CHECK-NEXT: Name: bar6@zed
@ CHECK-NEXT: Value: 0x0
@ CHECK-NEXT: Size: 0
-@ CHECK-NEXT: Binding: Local (0x0)
-@ CHECK-NEXT: Type: Section (0x3)
+@ CHECK-NEXT: Binding: Global (0x1)
+@ CHECK-NEXT: Type: None (0x0)
@ CHECK-NEXT: Other: 0
-@ CHECK-NEXT: Section: .bss
+@ CHECK-NEXT: Section: Undefined (0x0)
@ CHECK-NEXT: Symbol {
@ CHECK-NEXT: Name: g1@@zed
@@ -122,22 +122,4 @@ global1:
@ CHECK-NEXT: Other: 0
@ CHECK-NEXT: Section: .text
-@ CHECK-NEXT: Symbol {
-@ CHECK-NEXT: Name: bar2@zed
-@ CHECK-NEXT: Value: 0x0
-@ CHECK-NEXT: Size: 0
-@ CHECK-NEXT: Binding: Global (0x1)
-@ CHECK-NEXT: Type: None (0x0)
-@ CHECK-NEXT: Other: 0
-@ CHECK-NEXT: Section: Undefined (0x0)
-@ CHECK-NEXT: Symbol {
-@ CHECK-NEXT: Name: bar6@zed
-@ CHECK-NEXT: Value: 0x0
-@ CHECK-NEXT: Size: 0
-@ CHECK-NEXT: Binding: Global (0x1)
-@ CHECK-NEXT: Type: None (0x0)
-@ CHECK-NEXT: Other: 0
-@ CHECK-NEXT: Section: Undefined (0x0)
diff --git a/test/MC/ARM/directive-arch-armv2.s b/test/MC/ARM/directive-arch-armv2.s
index 40857ca..f6dc20c 100644
--- a/test/MC/ARM/directive-arch-armv2.s
+++ b/test/MC/ARM/directive-arch-armv2.s
@@ -20,7 +20,7 @@
@ CHECK-ATTR: Attribute {
@ CHECK-ATTR: TagName: CPU_arch
-@ CHECK-ATTR: Description: ARM v4
+@ CHECK-ATTR: Description: Pre-v4
@ CHECK-ATTR: Attribute {
@ CHECK-ATTR: TagName: ARM_ISA_use
diff --git a/test/MC/ARM/directive-arch-armv2a.s b/test/MC/ARM/directive-arch-armv2a.s
index 62c2ace..bb0a693 100644
--- a/test/MC/ARM/directive-arch-armv2a.s
+++ b/test/MC/ARM/directive-arch-armv2a.s
@@ -20,7 +20,7 @@
@ CHECK-ATTR: Attribute {
@ CHECK-ATTR: TagName: CPU_arch
-@ CHECK-ATTR: Description: ARM v4
+@ CHECK-ATTR: Description: Pre-v4
@ CHECK-ATTR: Attribute {
@ CHECK-ATTR: TagName: ARM_ISA_use
diff --git a/test/MC/ARM/directive-arch-armv3.s b/test/MC/ARM/directive-arch-armv3.s
index 41cce65..aeec638 100644
--- a/test/MC/ARM/directive-arch-armv3.s
+++ b/test/MC/ARM/directive-arch-armv3.s
@@ -20,7 +20,7 @@
@ CHECK-ATTR: Attribute {
@ CHECK-ATTR: TagName: CPU_arch
-@ CHECK-ATTR: Description: ARM v4
+@ CHECK-ATTR: Description: Pre-v4
@ CHECK-ATTR: Attribute {
@ CHECK-ATTR: TagName: ARM_ISA_use
diff --git a/test/MC/ARM/directive-arch-armv3m.s b/test/MC/ARM/directive-arch-armv3m.s
index 8041da2..fda8db5 100644
--- a/test/MC/ARM/directive-arch-armv3m.s
+++ b/test/MC/ARM/directive-arch-armv3m.s
@@ -20,7 +20,7 @@
@ CHECK-ATTR: Attribute {
@ CHECK-ATTR: TagName: CPU_arch
-@ CHECK-ATTR: Description: ARM v4
+@ CHECK-ATTR: Description: Pre-v4
@ CHECK-ATTR: Attribute {
@ CHECK-ATTR: TagName: ARM_ISA_use
diff --git a/test/MC/ARM/directive-fpu-multiple.s b/test/MC/ARM/directive-fpu-multiple.s
index de2baaf..66fc274 100644
--- a/test/MC/ARM/directive-fpu-multiple.s
+++ b/test/MC/ARM/directive-fpu-multiple.s
@@ -1,12 +1,28 @@
@ Check multiple .fpu directives.
@ The later .fpu directive should overwrite the earlier one.
-@ See also: directive-fpu-multiple2.s.
+@ We also check here that all the .fpu directives that we expect to work do work
@ RUN: llvm-mc -triple arm-eabi -filetype obj %s | llvm-readobj -arm-attributes \
@ RUN: | FileCheck %s -check-prefix CHECK-ATTR
+ .fpu none
+ .fpu vfp
+ .fpu vfpv2
+ .fpu vfpv3
+ .fpu vfpv3-d16
+ .fpu vfpv4
+ .fpu vfpv4-d16
+ .fpu fpv4-sp-d16
+ .fpu fpv5-d16
+ .fpu fpv5-sp-d16
+ .fpu fp-armv8
.fpu neon
+ .fpu neon-vfpv4
+ .fpu neon-fp-armv8
+ .fpu crypto-neon-fp-armv8
+ .fpu softvfp
.fpu vfpv4
@ CHECK-ATTR: FileAttributes {
diff --git a/test/MC/ARM/elf-movt.s b/test/MC/ARM/elf-movt.s
index 0fd7775..9b46caf 100644
--- a/test/MC/ARM/elf-movt.s
+++ b/test/MC/ARM/elf-movt.s
@@ -49,10 +49,6 @@ barf: @ @barf
@ OBJ-NEXT: AddressAlignment: 4
@ OBJ-NEXT: EntrySize: 8
@ OBJ-NEXT: Relocations [
-@ OBJ-NEXT: SectionData (
-@ OBJ-NEXT: 0000: 00000000 2D060000 04000000 2E060000 |....-...........|
-@ OBJ-NEXT: )
-@ OBJ-NEXT: }
diff --git a/test/MC/AsmParser/defsym.s b/test/MC/AsmParser/defsym.s
new file mode 100644
index 0000000..06981f5
--- /dev/null
+++ b/test/MC/AsmParser/defsym.s
@@ -0,0 +1,20 @@
+# RUN: llvm-mc -filetype=obj -triple=i386-unknown-elf -defsym a=7 -defsym b=11 %s | llvm-objdump -t - | FileCheck %s
+.ifndef a
+.if a<>7
+.ifndef b
+.if b<>11
+# CHECK: 00000007 *ABS* 00000000 a
+# CHECK: 0000000b *ABS* 00000000 b \ No newline at end of file
diff --git a/test/MC/AsmParser/defsym_error1.s b/test/MC/AsmParser/defsym_error1.s
new file mode 100644
index 0000000..8725778
--- /dev/null
+++ b/test/MC/AsmParser/defsym_error1.s
@@ -0,0 +1,2 @@
+# RUN: not llvm-mc -filetype=obj -triple=i386-unknown-elf -defsym aaoeuaoeu %s 2>&1 | FileCheck %s
+# CHECK: defsym must be of the form: sym=value
diff --git a/test/MC/AsmParser/defsym_error2.s b/test/MC/AsmParser/defsym_error2.s
new file mode 100644
index 0000000..ec4cc79
--- /dev/null
+++ b/test/MC/AsmParser/defsym_error2.s
@@ -0,0 +1,2 @@
+# RUN: not llvm-mc -filetype=obj -triple=i386-unknown-elf -defsym a=a %s 2>&1 | FileCheck %s
+# CHECK: error: Value is not an integer: a
diff --git a/test/MC/COFF/cross-section-relative.ll b/test/MC/COFF/cross-section-relative.ll
index 18823f8..6b7a3d7 100644
--- a/test/MC/COFF/cross-section-relative.ll
+++ b/test/MC/COFF/cross-section-relative.ll
@@ -1,7 +1,5 @@
; Verify the assembler produces the expected expressions
; RUN: llc -mtriple=x86_64-pc-win32 %s -o - | FileCheck %s
-; Verify the .fix data section conveys the right offsets and the right relocations
-; RUN: llc -mtriple=x86_64-pc-win32 -filetype=obj %s -o - | llvm-readobj -relocations -expand-relocs -sections -section-data | FileCheck %s --check-prefix=READOBJ
;;;; some globals
@@ -37,61 +35,3 @@
i64 256,
i32 trunc(i64 sub(i64 ptrtoint(i32* @g3 to i64), i64 ptrtoint(i32* getelementptr inbounds (%struct.EEType, %struct.EEType* @t6, i32 0, i32 2) to i64)) to i32 )
}, section ".fix"
-; READOBJ: Section {
-; READOBJ: Number: 5
-; READOBJ: Name: .fix (2E 66 69 78 00 00 00 00)
-; READOBJ: VirtualSize: 0x0
-; READOBJ: VirtualAddress: 0x0
-; READOBJ: RawDataSize: 56
-; READOBJ: PointerToRawData: 0xEC
-; READOBJ: PointerToRelocations: 0x124
-; READOBJ: PointerToLineNumbers: 0x0
-; READOBJ: RelocationCount: 6
-; READOBJ: LineNumberCount: 0
-; READOBJ: Characteristics [ (0xC0500040)
-; READOBJ: SectionData (
-; READOBJ: 0000: 10000000 00000000 0C000000 00000000 |................|
-; READOBJ: 0010: 08000000 00000000 0C000000 00000000 |................|
-; READOBJ: 0020: 01020000 00000000 00010000 00000000 |................|
-; READOBJ: 0030: 0C000000 00000000 |........|
-; READOBJ: Relocations [
-; READOBJ: Section (5) .fix {
-; READOBJ: Relocation {
-; READOBJ: Offset: 0x0
-; READOBJ: Symbol: .rdata
-; READOBJ: Relocation {
-; READOBJ: Offset: 0x8
-; READOBJ: Symbol: .rdata
-; READOBJ: Relocation {
-; READOBJ: Offset: 0x10
-; READOBJ: Symbol: .rdata
-; READOBJ: Relocation {
-; READOBJ: Offset: 0x18
-; READOBJ: Symbol: .rdata
-; READOBJ: Relocation {
-; READOBJ: Offset: 0x1C
-; READOBJ: Symbol: g3
-; READOBJ: Relocation {
-; READOBJ: Offset: 0x30
-; READOBJ: Symbol: .rdata
diff --git a/test/MC/COFF/cross-section-relative.s b/test/MC/COFF/cross-section-relative.s
new file mode 100644
index 0000000..dd94b0a
--- /dev/null
+++ b/test/MC/COFF/cross-section-relative.s
@@ -0,0 +1,118 @@
+// Verify the .fix data section conveys the right offsets and the right relocations
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s -o - | llvm-readobj -relocations -expand-relocs -sections -section-data | FileCheck %s --check-prefix=READOBJ
+ .text
+ .section .rdata,"dr"
+ .globl g1 # @g1
+ .align 4
+ .long 1 # 0x1
+ .globl g2 # @g2
+ .align 4
+ .long 2 # 0x2
+ .globl g3 # @g3
+ .align 4
+ .long 3 # 0x3
+ .globl g4 # @g4
+ .align 4
+ .long 4 # 0x4
+ .section .fix,"dw"
+ .globl t1 # @t1
+ .align 8
+ .quad (g3-t1)+4
+ .globl t2 # @t2
+ .align 8
+ .quad g3-t2
+ .globl t3 # @t3
+ .align 8
+ .quad (g3-t3)-4
+ .globl t4 # @t4
+ .align 4
+ .long g3-t4
+ .globl t5 # @t5
+ .align 4
+ .long g3@IMGREL
+ .globl t6 # @t6
+ .align 16
+ .ascii "\001\002"
+ .zero 6
+ .quad 256 # 0x100
+ .long g3-(t6+16)
+ .zero 4
+// READOBJ: Section {
+// READOBJ: Number: 5
+// READOBJ: Name: .fix (2E 66 69 78 00 00 00 00)
+// READOBJ: VirtualSize: 0x0
+// READOBJ: VirtualAddress: 0x0
+// READOBJ: RawDataSize: 56
+// READOBJ: PointerToRawData: 0xEC
+// READOBJ: PointerToRelocations: 0x124
+// READOBJ: PointerToLineNumbers: 0x0
+// READOBJ: RelocationCount: 6
+// READOBJ: LineNumberCount: 0
+// READOBJ: Characteristics [ (0xC0500040)
+// READOBJ: IMAGE_SCN_MEM_READ (0x40000000)
+// READOBJ: ]
+// READOBJ: SectionData (
+// READOBJ: 0000: 10000000 00000000 0C000000 00000000 |................|
+// READOBJ: 0010: 08000000 00000000 0C000000 00000000 |................|
+// READOBJ: 0020: 01020000 00000000 00010000 00000000 |................|
+// READOBJ: 0030: 0C000000 00000000 |........|
+// READOBJ: )
+// READOBJ: }
+// READOBJ: ]
+// READOBJ: Relocations [
+// READOBJ: Section (5) .fix {
+// READOBJ: Relocation {
+// READOBJ: Offset: 0x0
+// READOBJ: Type: IMAGE_REL_AMD64_REL32 (4)
+// READOBJ: Symbol: .rdata
+// READOBJ: }
+// READOBJ: Relocation {
+// READOBJ: Offset: 0x8
+// READOBJ: Type: IMAGE_REL_AMD64_REL32 (4)
+// READOBJ: Symbol: .rdata
+// READOBJ: }
+// READOBJ: Relocation {
+// READOBJ: Offset: 0x10
+// READOBJ: Type: IMAGE_REL_AMD64_REL32 (4)
+// READOBJ: Symbol: .rdata
+// READOBJ: }
+// READOBJ: Relocation {
+// READOBJ: Offset: 0x18
+// READOBJ: Type: IMAGE_REL_AMD64_REL32 (4)
+// READOBJ: Symbol: .rdata
+// READOBJ: }
+// READOBJ: Relocation {
+// READOBJ: Offset: 0x1C
+// READOBJ: Symbol: g3
+// READOBJ: }
+// READOBJ: Relocation {
+// READOBJ: Offset: 0x30
+// READOBJ: Type: IMAGE_REL_AMD64_REL32 (4)
+// READOBJ: Symbol: .rdata
+// READOBJ: }
diff --git a/test/MC/Disassembler/AArch64/armv8.1a-atomic.txt b/test/MC/Disassembler/AArch64/armv8.1a-atomic.txt
new file mode 100644
index 0000000..b20fabb8
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/armv8.1a-atomic.txt
@@ -0,0 +1,87 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a --disassemble < %s | FileCheck %s
+# CHECK: casb w0, w1, [x2]
+# CHECK: casab w0, w1, [x2]
+# CHECK: caslb w0, w1, [x2]
+# CHECK: casalb w0, w1, [x2]
+# CHECK: cash w0, w1, [x2]
+# CHECK: casah w0, w1, [x2]
+# CHECK: caslh w0, w1, [x2]
+# CHECK: casalh w0, w1, [x2]
+# CHECK: cas w0, w1, [x2]
+# CHECK: casa w0, w1, [x2]
+# CHECK: casl w0, w1, [x2]
+# CHECK: casal w0, w1, [x2]
+# CHECK: cas x0, x1, [x2]
+# CHECK: casa x0, x1, [x2]
+# CHECK: casl x0, x1, [x2]
+# CHECK: casal x0, x1, [x2]
+# CHECK: swp x0, x1, [x2]
+# CHECK: swpb w0, w1, [x2]
+# CHECK: swplh w0, w1, [x2]
+# CHECK: swpal x0, x1, [sp]
+# CHECK: ldadda x0, x1, [x2]
+# CHECK: ldclrl x0, x1, [x2]
+# CHECK: ldeoral x0, x1, [x2]
+# CHECK: ldset x0, x1, [x2]
+# CHECK: ldsmaxa w0, w1, [x2]
+# CHECK: ldsminlb w0, w1, [x2]
+# CHECK: ldumaxalh w0, w1, [x2]
+# CHECK: ldumin w0, w1, [x2]
+# CHECK: ldsminalh w7, w11, [x13]
+# CHECK: staddlb w0, [x2]
+# CHECK: stclrlh w0, [x2]
+# CHECK: steorl w0, [x2]
+# CHECK: stsetl x0, [x2]
+# CHECK: stsmaxb w0, [x2]
+# CHECK: stsminh w0, [x2]
+# CHECK: stumax w0, [x2]
+# CHECK: stumin x0, [x2]
+# CHECK: stsminl x29, [sp]
+# CHECK: casp x0, x1, x2, x3, [x4]
+# CHECK: casp w0, w1, w2, w3, [x4]
diff --git a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
index c777f7a..615d9ba 100644
--- a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
+++ b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
@@ -3414,6 +3414,7 @@
# CHECK: mrs x9, {{id_mmfr1_el1|ID_MMFR1_EL1}}
# CHECK: mrs x9, {{id_mmfr2_el1|ID_MMFR2_EL1}}
# CHECK: mrs x9, {{id_mmfr3_el1|ID_MMFR3_EL1}}
+# CHECK: mrs x9, {{id_mmfr4_el1|ID_MMFR4_EL1}}
# CHECK: mrs x9, {{id_isar0_el1|ID_ISAR0_EL1}}
# CHECK: mrs x9, {{id_isar1_el1|ID_ISAR1_EL1}}
# CHECK: mrs x9, {{id_isar2_el1|ID_ISAR2_EL1}}
@@ -3968,6 +3969,7 @@
0xa9 0x1 0x38 0xd5
0xc9 0x1 0x38 0xd5
0xe9 0x1 0x38 0xd5
+0xc9 0x2 0x38 0xd5
0x9 0x2 0x38 0xd5
0x29 0x2 0x38 0xd5
0x49 0x2 0x38 0xd5
diff --git a/test/MC/Disassembler/PowerPC/vsx.txt b/test/MC/Disassembler/PowerPC/vsx.txt
index 04b2eeb..6f4ba6f 100644
--- a/test/MC/Disassembler/PowerPC/vsx.txt
+++ b/test/MC/Disassembler/PowerPC/vsx.txt
@@ -90,6 +90,12 @@
# CHECK: xsmaddmdp 7, 63, 27
0xf0 0xff 0xd9 0x4c
+# CHECK: xsmaddasp 7, 63, 27
+0xf0 0xff 0xd8 0x0c
+# CHECK: xsmaddmsp 7, 63, 27
+0xf0 0xff 0xd8 0x4c
# CHECK: xsmaxdp 7, 63, 27
0xf0 0xff 0xdd 0x04
@@ -102,6 +108,12 @@
# CHECK: xsmsubmdp 7, 63, 27
0xf0 0xff 0xd9 0xcc
+# CHECK: xsmsubasp 7, 63, 27
+0xf0 0xff 0xd8 0x8c
+# CHECK: xsmsubmsp 7, 63, 27
+0xf0 0xff 0xd8 0xcc
# CHECK: xsmulsp 7, 63, 27
0xf0 0xff 0xd8 0x84
@@ -126,6 +138,18 @@
# CHECK: xsnmsubmdp 7, 63, 27
0xf0 0xff 0xdd 0xcc
+# CHECK: xsnmaddasp 7, 63, 27
+0xf0 0xff 0xdc 0x0c
+# CHECK: xsnmaddmsp 7, 63, 27
+0xf0 0xff 0xdc 0x4c
+# CHECK: xsnmsubasp 7, 63, 27
+0xf0 0xff 0xdc 0x8c
+# CHECK: xsnmsubmsp 7, 63, 27
+0xf0 0xff 0xdc 0xcc
# CHECK: xsrdpi 7, 27
0xf0 0xe0 0xd9 0x24
diff --git a/test/MC/ELF/alias.s b/test/MC/ELF/alias.s
index 0ab6dd4..df39fd4 100644
--- a/test/MC/ELF/alias.s
+++ b/test/MC/ELF/alias.s
@@ -107,33 +107,25 @@ leaq .Llocal1(%rip), %rdi
// CHECK-NEXT: Section: .text
// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: .text (0)
+// CHECK-NEXT: Name: (0)
// CHECK-NOT: Symbol {
// CHECK: }
// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: .data (0)
-// CHECK-NOT: Symbol {
-// CHECK: }
-// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: .bss (0)
-// CHECK-NOT: Symbol {
-// CHECK: }
-// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: bar3
+// CHECK-NEXT: Name: bar2
// CHECK-NEXT: Value: 0x0
// CHECK-NEXT: Size: 0
// CHECK-NEXT: Binding: Global
// CHECK-NEXT: Type: None
// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: .text
+// CHECK-NEXT: Section: Undefined (0x0)
// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: bar2
+// CHECK-NEXT: Name: bar3
// CHECK-NEXT: Value: 0x0
// CHECK-NEXT: Size: 0
// CHECK-NEXT: Binding: Global
// CHECK-NEXT: Type: None
// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: Undefined (0x0)
+// CHECK-NEXT: Section: .text
diff --git a/test/MC/ELF/basic-elf-32.s b/test/MC/ELF/basic-elf-32.s
index 16266af..1036b04 100644
--- a/test/MC/ELF/basic-elf-32.s
+++ b/test/MC/ELF/basic-elf-32.s
@@ -54,20 +54,6 @@ main: # @main
// CHECK: ]
// CHECK: Symbols [
-// CHECK: Symbol {
-// CHECK: Binding: Local
-// CHECK: Type: Section
-// CHECK: }
-// CHECK: Symbol {
-// CHECK: Binding: Local
-// CHECK: Type: Section
-// CHECK: }
-// CHECK: Symbol {
-// CHECK: Binding: Local
-// CHECK: Type: Section
-// CHECK: }
// CHECK: Symbol {
// CHECK: Name: main
diff --git a/test/MC/ELF/basic-elf-64.s b/test/MC/ELF/basic-elf-64.s
index d99125e..b93f9ae 100644
--- a/test/MC/ELF/basic-elf-64.s
+++ b/test/MC/ELF/basic-elf-64.s
@@ -58,16 +58,6 @@ main: # @main
// CHECK: Type: Section
// CHECK: Symbol {
-// CHECK: Binding: Local
-// CHECK: Type: Section
-// CHECK: }
-// CHECK: Symbol {
-// CHECK: Binding: Local
-// CHECK: Type: Section
-// CHECK: }
-// CHECK: Symbol {
// CHECK: Name: main
// CHECK: Binding: Global
// CHECK: Type: Function
diff --git a/test/MC/ELF/comdat-dup-group-name.s b/test/MC/ELF/comdat-dup-group-name.s
index a2dc4cc..e52f3dc 100644
--- a/test/MC/ELF/comdat-dup-group-name.s
+++ b/test/MC/ELF/comdat-dup-group-name.s
@@ -24,15 +24,6 @@
// CHECK: Section: .group (0x7)
-// CHECK: Name: .foo
-// CHECK-NOT: }
-// CHECK: Section: .foo (0x6)
-// CHECK: Name: .foo
-// CHECK-NOT: }
-// CHECK: Section: .foo (0x8)
.section .foo,"axG",@progbits,f1,comdat
diff --git a/test/MC/ELF/comdat.s b/test/MC/ELF/comdat.s
index e71dea0..18da17e 100644
--- a/test/MC/ELF/comdat.s
+++ b/test/MC/ELF/comdat.s
@@ -46,7 +46,7 @@
// CHECK-NEXT: Offset:
// CHECK-NEXT: Size: 12
// CHECK-NEXT: Link:
-// CHECK-NEXT: Info: 10
+// CHECK-NEXT: Info: 3
// CHECK-NEXT: AddressAlignment: 4
// CHECK-NEXT: EntrySize: 4
// CHECK-NEXT: SectionData (
diff --git a/test/MC/ELF/common-error3.s b/test/MC/ELF/common-error3.s
new file mode 100644
index 0000000..a84779e
--- /dev/null
+++ b/test/MC/ELF/common-error3.s
@@ -0,0 +1,5 @@
+# RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux %s 2>&1 | FileCheck %s
+# CHECK: Symbol: C redeclared as different type
+ .comm C,4,4
+ .comm C,8,4 \ No newline at end of file
diff --git a/test/MC/ELF/common-redeclare.s b/test/MC/ELF/common-redeclare.s
new file mode 100644
index 0000000..f8ee17d
--- /dev/null
+++ b/test/MC/ELF/common-redeclare.s
@@ -0,0 +1,5 @@
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux %s | llvm-objdump -t - | FileCheck %s
+# CHECK: 0000000000000004 g *COM* 00000004 C
+ .comm C,4,4
+ .comm C,4,4 \ No newline at end of file
diff --git a/test/MC/ELF/empty.s b/test/MC/ELF/empty.s
index 6ddbd8c..7b686fe 100644
--- a/test/MC/ELF/empty.s
+++ b/test/MC/ELF/empty.s
@@ -78,9 +78,9 @@
// CHECK-NEXT: Address: 0x0
// CHECK-NEXT: Offset:
-// CHECK-NEXT: Size: 96
+// CHECK-NEXT: Size: 24
// CHECK-NEXT: Link:
-// CHECK-NEXT: Info: 4
+// CHECK-NEXT: Info: 1
// CHECK-NEXT: AddressAlignment: 8
// CHECK-NEXT: EntrySize: 24
diff --git a/test/MC/ELF/got.s b/test/MC/ELF/got.s
index 30114b7..3fe3e4a 100644
--- a/test/MC/ELF/got.s
+++ b/test/MC/ELF/got.s
@@ -1,7 +1,7 @@
// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r -t | FileCheck %s
-// Test that this produces a R_X86_64_GOT32 and that we have an undefined
-// reference to _GLOBAL_OFFSET_TABLE_.
+// Test that this produces the correct relocations R_X86_64_GOT32 and that we,
+// unlike gas, don't create a _GLOBAL_OFFSET_TABLE_ symbol as a side effect.
movl foo@GOT, %eax
movl foo@GOTPCREL(%rip), %eax
@@ -13,8 +13,5 @@
-// CHECK: Symbol {
-// CHECK-NEXT: Value:
-// CHECK-NEXT: Size:
-// CHECK-NEXT: Binding: Global
+// CHECK: Symbols [
diff --git a/test/MC/ELF/many-sections-2.s b/test/MC/ELF/many-sections-2.s
index b52c01a..0077552 100644
--- a/test/MC/ELF/many-sections-2.s
+++ b/test/MC/ELF/many-sections-2.s
@@ -32,13 +32,13 @@
// Test that this file has one section too many.
-// SYMBOLS: Name: dm
-// SYMBOLS-NEXT: Value: 0x0
-// SYMBOLS-NEXT: Size: 0
-// SYMBOLS-NEXT: Binding: Local (0x0)
-// SYMBOLS-NEXT: Type: Section (0x3)
-// SYMBOLS-NEXT: Other: 0
-// SYMBOLS-NEXT: Section: dm (0xFF00)
+// SYMBOLS: Name: (0)
+// SYMBOLS: Value: 0x0
+// SYMBOLS: Size: 0
+// SYMBOLS: Binding: Local (0x0)
+// SYMBOLS: Type: Section (0x3)
+// SYMBOLS: Other: 0
+// SYMBOLS: Section: dm (0xFF00)
@@ -130,3 +130,4 @@ gen_sections4 m
b = a + 1
+.long dm
diff --git a/test/MC/ELF/many-sections.s b/test/MC/ELF/many-sections.s
index dbba871..2db6abb 100644
--- a/test/MC/ELF/many-sections.s
+++ b/test/MC/ELF/many-sections.s
@@ -9,13 +9,13 @@
// Check the last referenced section.
-// SYMBOLS: Name: zed
-// SYMBOLS-NEXT: Value: 0x0
-// SYMBOLS-NEXT: Size: 0
-// SYMBOLS-NEXT: Binding: Local (0x0)
-// SYMBOLS-NEXT: Type: Section (0x3)
-// SYMBOLS-NEXT: Other: 0
-// SYMBOLS-NEXT: Section: zed (0xFEFF)
+// SYMBOLS: Name: (0)
+// SYMBOLS: Value: 0x0
+// SYMBOLS: Size: 0
+// SYMBOLS: Binding: Local (0x0)
+// SYMBOLS: Type: Section (0x3)
+// SYMBOLS: Other: 0
+// SYMBOLS: Section: zed (0xFEFF)
@@ -106,3 +106,4 @@ gen_sections8 l
.section foo
.section bar
.section zed
+.long zed
diff --git a/test/MC/ELF/noexec.s b/test/MC/ELF/noexec.s
index 3769427..e8f460e 100644
--- a/test/MC/ELF/noexec.s
+++ b/test/MC/ELF/noexec.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -no-exec-stack -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -t | FileCheck %s
+// RUN: llvm-mc -no-exec-stack -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
// CHECK: Section {
// CHECK: Index:
@@ -14,13 +14,3 @@
// CHECK-NEXT: AddressAlignment: 1
// CHECK-NEXT: EntrySize: 0
-// CHECK: Symbol {
-// CHECK: Name: .note.GNU-stack (0)
-// CHECK-NEXT: Value: 0x0
-// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Binding: Local
-// CHECK-NEXT: Type: Section
-// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: .note.GNU-stack
diff --git a/test/MC/ELF/popsection.s b/test/MC/ELF/popsection.s
new file mode 100644
index 0000000..19f5568
--- /dev/null
+++ b/test/MC/ELF/popsection.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -filetype=obj %s -o - -triple x86_64-pc-linux | llvm-readobj -s - | FileCheck %s
+// This used to crash. Test that it create an empty section instead.
+ .pushsection foo
+ .popsection
+// CHECK: Section {
+// CHECK: Index: 5
+// CHECK-NEXT: Name: foo
+// CHECK-NEXT: Flags [ (0x0)
+// CHECK-NEXT: Address: 0x0
+// CHECK-NEXT: Offset:
+// CHECK-NEXT: Size: 0
+// CHECK-NEXT: Link: 0
+// CHECK-NEXT: Info: 0
+// CHECK-NEXT: AddressAlignment: 1
+// CHECK-NEXT: EntrySize: 0
diff --git a/test/MC/ELF/reloc-same-name-section.s b/test/MC/ELF/reloc-same-name-section.s
index e63ea54..57463c6 100644
--- a/test/MC/ELF/reloc-same-name-section.s
+++ b/test/MC/ELF/reloc-same-name-section.s
@@ -7,13 +7,13 @@
// CHECK-NEXT: Relocation {
// CHECK-NEXT: Offset:
// CHECK-NEXT: Type:
-// CHECK-NEXT: Symbol: .foo (7)
+// CHECK-NEXT: Symbol: .foo (4)
// CHECK-NEXT: Addend:
// CHECK-NEXT: Relocation {
// CHECK-NEXT: Offset:
// CHECK-NEXT: Type:
-// CHECK-NEXT: Symbol: .foo (8)
+// CHECK-NEXT: Symbol: .foo (5)
// CHECK-NEXT: Addend:
diff --git a/test/MC/ELF/relocation-386.s b/test/MC/ELF/relocation-386.s
index 2af6add..6b7e02f 100644
--- a/test/MC/ELF/relocation-386.s
+++ b/test/MC/ELF/relocation-386.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | llvm-readobj -r -t | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | llvm-readobj -r | FileCheck %s
// Test that we produce the correct relocation types and that the relocations
// correctly point to the section or the symbol.
@@ -71,27 +71,6 @@
-// Symbol 4 is zed
-// CHECK: Symbol {
-// CHECK: Name: zed
-// CHECK-NEXT: Value: 0x0
-// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Binding: Local
-// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: zedsec
-// Symbol 7 is section 4
-// CHECK: Symbol {
-// CHECK: Name: .bss (0)
-// CHECK-NEXT: Value: 0x0
-// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Binding: Local
-// CHECK-NEXT: Type: Section
-// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: .bss
leal .Lfoo@GOTOFF(%ebx), %eax
diff --git a/test/MC/ELF/relocation.s b/test/MC/ELF/relocation.s
index 2841612..34f1a40 100644
--- a/test/MC/ELF/relocation.s
+++ b/test/MC/ELF/relocation.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -t | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr | FileCheck %s
// Test that we produce the correct relocation.
@@ -54,6 +54,10 @@ bar:
.quad pr23272_2 - pr23272
.quad pr23272_3 - pr23272
+ .code16
+ call pr23771
// CHECK: Section {
// CHECK: Name: .rela.text
// CHECK: Relocations [
@@ -90,15 +94,6 @@ bar:
// CHECK-NEXT: 0xD8 R_X86_64_GOTPCREL foo 0x0
// CHECK-NEXT: 0xDC R_X86_64_PLT32 foo 0x0
+// CHECK-NEXT: 0xF1 R_X86_64_PC16 pr23771 0xFFFFFFFFFFFFFFFE
-// CHECK: Symbol {
-// CHECK: Name: .text (0)
-// CHECK-NEXT: Value:
-// CHECK-NEXT: Size:
-// CHECK-NEXT: Binding: Local
-// CHECK-NEXT: Type: Section
-// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: .text
diff --git a/test/MC/ELF/section-sym.s b/test/MC/ELF/section-sym.s
index f012b2f..4a9484d 100644
--- a/test/MC/ELF/section-sym.s
+++ b/test/MC/ELF/section-sym.s
@@ -40,52 +40,54 @@
// CHECK-NEXT: EntrySize: 0
-// The relocation points to symbol 6
+// The relocation points to symbol 3
// CHECK: Relocations [
// CHECK-NEXT: Section ({{.*}}) .relabar {
// CHECK-NEXT: Relocation {
// CHECK-NEXT: Offset: 0x0
// CHECK-NEXT: Type: R_X86_64_32 (10)
-// CHECK-NEXT: Symbol: foo (6)
+// CHECK-NEXT: Symbol: foo (3)
// CHECK-NEXT: Addend: 0x0
-// The symbol 6 corresponds section 6
+// Symbol 3 is section 6
// CHECK: Symbols [
-// symbol 0
-// CHECK-NOT: Name
-// CHECK: Name:
-// symbol 1
-// CHECK-NOT: Name
-// CHECK: Name: f1
-// symbol 2
-// CHECK-NOT: Name
-// CHECK: Name: f2
-// symbol 3
-// CHECK-NOT: Name
-// CHECK: Name: .text
-// symbol 4
-// CHECK-NOT: Name
-// CHECK: Name: .data
-// symbol 5
-// CHECK-NOT: Name
-// CHECK: Name: .bss
-// symbol 6
-// CHECK-NOT: Name
-// CHECK: Name: foo
-// CHECK: Section: foo (0x6)
-// symbol 7
-// CHECK-NOT: Name
-// CHECK: Name: foo
-// CHECK: Section: foo (0x8)
+// CHECK-NEXT: Symbol {
+// CHECK-NEXT: Name: (0)
+// CHECK-NEXT: Value: 0x0
+// CHECK-NEXT: Size: 0
+// CHECK-NEXT: Binding: Local (0x0)
+// CHECK-NEXT: Type: None (0x0)
+// CHECK-NEXT: Other: 0
+// CHECK-NEXT: Section: Undefined (0x0)
+// CHECK-NEXT: Symbol {
+// CHECK-NEXT: Name: f1 (57)
+// CHECK-NEXT: Value: 0x0
+// CHECK-NEXT: Size: 0
+// CHECK-NEXT: Binding: Local (0x0)
+// CHECK-NEXT: Type: None (0x0)
+// CHECK-NEXT: Other: 0
+// CHECK-NEXT: Section: .group (0x5)
+// CHECK-NEXT: Symbol {
+// CHECK-NEXT: Name: f2 (54)
+// CHECK-NEXT: Value: 0x0
+// CHECK-NEXT: Size: 0
+// CHECK-NEXT: Binding: Local (0x0)
+// CHECK-NEXT: Type: None (0x0)
+// CHECK-NEXT: Other: 0
+// CHECK-NEXT: Section: .group (0x7)
+// CHECK-NEXT: Symbol {
+// CHECK-NEXT: Name: (0)
+// CHECK-NEXT: Value: 0x0
+// CHECK-NEXT: Size: 0
+// CHECK-NEXT: Binding: Local (0x0)
+// CHECK-NEXT: Type: Section (0x3)
+// CHECK-NEXT: Other: 0
+// CHECK-NEXT: Section: foo (0x6)
diff --git a/test/MC/ELF/section-sym2.s b/test/MC/ELF/section-sym2.s
index f62e3f9..ca38632 100644
--- a/test/MC/ELF/section-sym2.s
+++ b/test/MC/ELF/section-sym2.s
@@ -19,10 +19,6 @@ mov .rodata, %rsi
// There is only one .rodata symbol
// CHECK:Symbols [
-// CHECK-NOT: Name: .rodata
-// CHECK: Name: .rodata
-// CHECK-NEXT: Value: 0x0
-// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Binding: Local (0x0)
-// CHECK-NEXT: Type: Section (0x3)
-// CHECK-NOT: Name: .rodata
+// CHECK: Type: Section (0x3)
+// CHECK: Section: .rodata
+// CHECK-NOT: Section: .rodata
diff --git a/test/MC/ELF/strtab-suffix-opt.s b/test/MC/ELF/strtab-suffix-opt.s
index 0524656..96d1500 100644
--- a/test/MC/ELF/strtab-suffix-opt.s
+++ b/test/MC/ELF/strtab-suffix-opt.s
@@ -16,6 +16,6 @@ foobar:
.size foobar, .Ltmp3-foobar
-// CHECK: Name: foobar (16)
// CHECK: Name: bar (19)
// CHECK: Name: foo (23)
+// CHECK: Name: foobar (16)
diff --git a/test/MC/ELF/symver.s b/test/MC/ELF/symver.s
index 80d71fd..0f434dd 100644
--- a/test/MC/ELF/symver.s
+++ b/test/MC/ELF/symver.s
@@ -77,7 +77,7 @@ global1:
// CHECK-NEXT: Section: .text
// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: .text
+// CHECK-NEXT: Name: (0)
// CHECK-NEXT: Value: 0x0
// CHECK-NEXT: Size: 0
// CHECK-NEXT: Binding: Local
@@ -86,22 +86,22 @@ global1:
// CHECK-NEXT: Section: .text
// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: .data
+// CHECK-NEXT: Name: bar2@zed
// CHECK-NEXT: Value: 0x0
// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Binding: Local
-// CHECK-NEXT: Type: Section
+// CHECK-NEXT: Binding: Global
+// CHECK-NEXT: Type: None
// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: .data
+// CHECK-NEXT: Section: Undefined
// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: .bss
+// CHECK-NEXT: Name: bar6@zed
// CHECK-NEXT: Value: 0x0
// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Binding: Local
-// CHECK-NEXT: Type: Section
+// CHECK-NEXT: Binding: Global
+// CHECK-NEXT: Type: None
// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: .bss
+// CHECK-NEXT: Section: Undefined
// CHECK-NEXT: Symbol {
// CHECK-NEXT: Name: g1@@zed
@@ -121,22 +121,4 @@ global1:
// CHECK-NEXT: Other: 0
// CHECK-NEXT: Section: .text
-// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: bar2@zed
-// CHECK-NEXT: Value: 0x0
-// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Binding: Global
-// CHECK-NEXT: Type: None
-// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: Undefined
-// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: bar6@zed
-// CHECK-NEXT: Value: 0x0
-// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Binding: Global
-// CHECK-NEXT: Type: None
-// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: Undefined
diff --git a/test/MC/ELF/undef.s b/test/MC/ELF/undef.s
index 245b563..9577ea2 100644
--- a/test/MC/ELF/undef.s
+++ b/test/MC/ELF/undef.s
@@ -42,42 +42,6 @@ test2_b = undef + 1
// CHECK-NEXT: Section: .rodata.str1.1
// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: .text
-// CHECK-NEXT: Value: 0x0
-// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Binding: Local
-// CHECK-NEXT: Type: Section
-// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: .text
-// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: .data
-// CHECK-NEXT: Value: 0x0
-// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Binding: Local
-// CHECK-NEXT: Type: Section
-// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: .data
-// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: .bss
-// CHECK-NEXT: Value: 0x0
-// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Binding: Local
-// CHECK-NEXT: Type: Section
-// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: .bss
-// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: .rodata.str1.1
-// CHECK-NEXT: Value: 0x0
-// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Binding: Local
-// CHECK-NEXT: Type: Section
-// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: .rodata.str1.1
-// CHECK-NEXT: Symbol {
// CHECK-NEXT: Name: .Lsym1
// CHECK-NEXT: Value: 0x0
// CHECK-NEXT: Size: 0
diff --git a/test/MC/ELF/weakref-reloc.s b/test/MC/ELF/weakref-reloc.s
index 484167f..baf8006 100644
--- a/test/MC/ELF/weakref-reloc.s
+++ b/test/MC/ELF/weakref-reloc.s
@@ -1,7 +1,6 @@
// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r -t | FileCheck %s
-// Test that the relocations point to the correct symbols. We used to get the
-// symbol index wrong for weakrefs when creating _GLOBAL_OFFSET_TABLE_.
+// Test that the relocations point to the correct symbols.
.weakref bar,foo
call zed@PLT
@@ -13,32 +12,3 @@
-// CHECK: Symbols [
-// CHECK: Symbol {
-// CHECK-NEXT: Value: 0x0
-// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Binding: Global
-// CHECK-NEXT: Type: None
-// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: Undefined (0x0)
-// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: foo
-// CHECK-NEXT: Value: 0x0
-// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Binding: Weak
-// CHECK-NEXT: Type: None
-// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: Undefined (0x0)
-// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: zed
-// CHECK-NEXT: Value: 0x0
-// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Binding: Global
-// CHECK-NEXT: Type: None
-// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: Undefined (0x0)
diff --git a/test/MC/ELF/weakref.s b/test/MC/ELF/weakref.s
index d263af3..d342c61 100644
--- a/test/MC/ELF/weakref.s
+++ b/test/MC/ELF/weakref.s
@@ -116,7 +116,7 @@ bar15:
// CHECK-NEXT: Section: .text
// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: .text
+// CHECK-NEXT: Name: (0)
// CHECK-NEXT: Value: 0x0
// CHECK-NEXT: Size: 0
// CHECK-NEXT: Binding: Local
@@ -125,24 +125,6 @@ bar15:
// CHECK-NEXT: Section: .text
// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: .data
-// CHECK-NEXT: Value: 0x0
-// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Binding: Local
-// CHECK-NEXT: Type: Section
-// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: .data
-// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: .bss
-// CHECK-NEXT: Value: 0x0
-// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Binding: Local
-// CHECK-NEXT: Type: Section
-// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: .bss
-// CHECK-NEXT: Symbol {
// CHECK-NEXT: Name: bar10
// CHECK-NEXT: Value: 0x28
// CHECK-NEXT: Size: 0
diff --git a/test/MC/Hexagon/inst_add.ll b/test/MC/Hexagon/inst_add.ll
index 20a7b31..8267769 100644
--- a/test/MC/Hexagon/inst_add.ll
+++ b/test/MC/Hexagon/inst_add.ll
@@ -1,5 +1,5 @@
-;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
-;; RUN: | llvm-objdump -s - | FileCheck %s
+; RUN: llc -march=hexagon -filetype=obj %s -o - \
+; RUN: | llvm-objdump -d - | FileCheck %s
define i32 @foo (i32 %a, i32 %b)
@@ -7,4 +7,4 @@ define i32 @foo (i32 %a, i32 %b)
ret i32 %1
-; CHECK: 0000 004100f3 00c09f52
+; CHECK: c0 3f 10 58 58103fc0
diff --git a/test/MC/Hexagon/inst_cmp_eq.ll b/test/MC/Hexagon/inst_cmp_eq.ll
index 113db63..9820236 100644
--- a/test/MC/Hexagon/inst_cmp_eq.ll
+++ b/test/MC/Hexagon/inst_cmp_eq.ll
@@ -1,5 +1,5 @@
;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
-;; RUN: | llvm-objdump -s - | FileCheck %s
+;; RUN: | llvm-objdump -d - | FileCheck %s
define i1 @foo (i32 %a, i32 %b)
@@ -7,4 +7,6 @@ define i1 @foo (i32 %a, i32 %b)
ret i1 %1
-; CHECK: 0000 004100f2 00404089 00c09f52
+; CHECK: p0 = cmp.eq(r0, r1)
+; CHECK: r0 = p0
+; CHECK: jumpr r31
diff --git a/test/MC/Hexagon/inst_cmp_eqi.ll b/test/MC/Hexagon/inst_cmp_eqi.ll
index 70c4c30..612dfdc 100644
--- a/test/MC/Hexagon/inst_cmp_eqi.ll
+++ b/test/MC/Hexagon/inst_cmp_eqi.ll
@@ -1,5 +1,5 @@
;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
-;; RUN: | llvm-objdump -s - | FileCheck %s
+;; RUN: | llvm-objdump -d - | FileCheck %s
define i1 @foo (i32 %a)
@@ -7,4 +7,6 @@ define i1 @foo (i32 %a)
ret i1 %1
-; CHECK: 0000 40450075 00404089 00c09f52
+; CHECK: p0 = cmp.eq(r0, #42)
+; CHECK: r0 = p0
+; CHECK: jumpr r31
diff --git a/test/MC/Hexagon/inst_cmp_gt.ll b/test/MC/Hexagon/inst_cmp_gt.ll
index 85fedbf..3ce1c0a 100644
--- a/test/MC/Hexagon/inst_cmp_gt.ll
+++ b/test/MC/Hexagon/inst_cmp_gt.ll
@@ -1,5 +1,5 @@
;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
-;; RUN: | llvm-objdump -s - | FileCheck %s
+;; RUN: | llvm-objdump -d - | FileCheck %s
define i1 @foo (i32 %a, i32 %b)
@@ -7,4 +7,6 @@ define i1 @foo (i32 %a, i32 %b)
ret i1 %1
-; CHECK: 0000 004140f2 00404089 00c09f52
+; CHECK: p0 =, r1)
+; CHECK: r0 = p0
+; CHECK: jumpr r31 } \ No newline at end of file
diff --git a/test/MC/Hexagon/inst_cmp_gti.ll b/test/MC/Hexagon/inst_cmp_gti.ll
index 18ba3e4..f3c13a2 100644
--- a/test/MC/Hexagon/inst_cmp_gti.ll
+++ b/test/MC/Hexagon/inst_cmp_gti.ll
@@ -1,5 +1,5 @@
;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
-;; RUN: | llvm-objdump -s - | FileCheck %s
+;; RUN: | llvm-objdump -d - | FileCheck %s
define i1 @foo (i32 %a)
@@ -7,4 +7,6 @@ define i1 @foo (i32 %a)
ret i1 %1
-; CHECK: 0000 40454075 00404089 00c09f52
+; CHECK: p0 =, #42)
+; CHECK: r0 = p0
+; CHECK: jumpr r31
diff --git a/test/MC/Hexagon/inst_cmp_lt.ll b/test/MC/Hexagon/inst_cmp_lt.ll
index 3a76184..80ba16f 100644
--- a/test/MC/Hexagon/inst_cmp_lt.ll
+++ b/test/MC/Hexagon/inst_cmp_lt.ll
@@ -1,5 +1,5 @@
;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
-;; RUN: | llvm-objdump -s - | FileCheck %s
+;; RUN: | llvm-objdump -d - | FileCheck %s
define i1 @foo (i32 %a, i32 %b)
@@ -7,4 +7,6 @@ define i1 @foo (i32 %a, i32 %b)
ret i1 %1
-; CHECK: 0000 004041f2 00404089 00c09f52
+; CHECK: p0 =, r0)
+; CHECK: r0 = p0
+; CHECK: jumpr r31
diff --git a/test/MC/Hexagon/inst_cmp_ugt.ll b/test/MC/Hexagon/inst_cmp_ugt.ll
index 096536f..07fa784 100644
--- a/test/MC/Hexagon/inst_cmp_ugt.ll
+++ b/test/MC/Hexagon/inst_cmp_ugt.ll
@@ -1,5 +1,5 @@
;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
-;; RUN: | llvm-objdump -s - | FileCheck %s
+;; RUN: | llvm-objdump -d - | FileCheck %s
define i1 @foo (i32 %a, i32 %b)
@@ -7,4 +7,6 @@ define i1 @foo (i32 %a, i32 %b)
ret i1 %1
-; CHECK: 0000 004160f2 00404089 00c09f52
+; CHECK: p0 = cmp.gtu(r0, r1)
+; CHECK: r0 = p0
+; CHECK: jumpr r31
diff --git a/test/MC/Hexagon/inst_cmp_ugti.ll b/test/MC/Hexagon/inst_cmp_ugti.ll
index a835834..59db552 100644
--- a/test/MC/Hexagon/inst_cmp_ugti.ll
+++ b/test/MC/Hexagon/inst_cmp_ugti.ll
@@ -1,5 +1,5 @@
;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
-;; RUN: | llvm-objdump -s - | FileCheck %s
+;; RUN: | llvm-objdump -d - | FileCheck %s
define i1 @foo (i32 %a)
@@ -7,4 +7,6 @@ define i1 @foo (i32 %a)
ret i1 %1
-; CHECK: 0000 40458075 00404089 00c09f52
+; CHECK: p0 = cmp.gtu(r0, #42)
+; CHECK: r0 = p0
+; CHECK: jumpr r31
diff --git a/test/MC/Hexagon/inst_cmp_ult.ll b/test/MC/Hexagon/inst_cmp_ult.ll
index 4323fa0..c880ac8 100644
--- a/test/MC/Hexagon/inst_cmp_ult.ll
+++ b/test/MC/Hexagon/inst_cmp_ult.ll
@@ -1,5 +1,5 @@
;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
-;; RUN: | llvm-objdump -s - | FileCheck %s
+;; RUN: | llvm-objdump -d - | FileCheck %s
define i1 @foo (i32 %a, i32 %b)
@@ -7,4 +7,6 @@ define i1 @foo (i32 %a, i32 %b)
ret i1 %1
-; CHECK: 0000 004061f2 00404089 00c09f52
+; CHECK: p0 = cmp.gtu(r1, r0)
+; CHECK: r0 = p0
+; CHECK: jumpr r31 \ No newline at end of file
diff --git a/test/MC/Hexagon/inst_select.ll b/test/MC/Hexagon/inst_select.ll
index 29a2db0..9d12c1d 100644
--- a/test/MC/Hexagon/inst_select.ll
+++ b/test/MC/Hexagon/inst_select.ll
@@ -1,5 +1,5 @@
-;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
-;; RUN: | llvm-objdump -s - | FileCheck %s
+; RUN: llc -march=hexagon -filetype=obj %s -o - \
+; RUN: | llvm-objdump -d - | FileCheck %s
define i32 @foo (i1 %a, i32 %b, i32 %c)
@@ -7,4 +7,7 @@ define i32 @foo (i1 %a, i32 %b, i32 %c)
ret i32 %1
-; CHECK: 0000 00400085 00600174 00608274 00c09f52
+; CHECK: 00 40 00 85 85004000
+; CHECK: 00 40 9f 52 529f4000
+; CHECK: 00 60 01 74 74016000
+; CHECK: 00 e0 82 74 7482e000 \ No newline at end of file
diff --git a/test/MC/Hexagon/inst_sxtb.ll b/test/MC/Hexagon/inst_sxtb.ll
index 4a21742..34219c7 100644
--- a/test/MC/Hexagon/inst_sxtb.ll
+++ b/test/MC/Hexagon/inst_sxtb.ll
@@ -1,5 +1,5 @@
-;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
-;; RUN: | llvm-objdump -s - | FileCheck %s
+; RUN: llc -march=hexagon -filetype=obj %s -o - \
+; RUN: | llvm-objdump -d - | FileCheck %s
define i32 @foo (i8 %a)
@@ -7,4 +7,4 @@ define i32 @foo (i8 %a)
ret i32 %1
-; CHECK: 0000 0040a070 00c09f52
+; CHECK: c0 3f 00 55 55003fc0
diff --git a/test/MC/Hexagon/inst_sxth.ll b/test/MC/Hexagon/inst_sxth.ll
index f0bcf58..5d1223d 100644
--- a/test/MC/Hexagon/inst_sxth.ll
+++ b/test/MC/Hexagon/inst_sxth.ll
@@ -1,5 +1,5 @@
-;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
-;; RUN: | llvm-objdump -s - | FileCheck %s
+; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+; RUN: | llvm-objdump -d - | FileCheck %s
define i32 @foo (i16 %a)
@@ -7,4 +7,4 @@ define i32 @foo (i16 %a)
ret i32 %1
-; CHECK: 0000 0040e070 00c09f52
+; CHECK: c0 3f 00 54 54003fc0 \ No newline at end of file
diff --git a/test/MC/Hexagon/inst_zxtb.ll b/test/MC/Hexagon/inst_zxtb.ll
index 622c036..86da5e5 100644
--- a/test/MC/Hexagon/inst_zxtb.ll
+++ b/test/MC/Hexagon/inst_zxtb.ll
@@ -1,5 +1,5 @@
-;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
-;; RUN: | llvm-objdump -s - | FileCheck %s
+; RUN: llc -march=hexagon -filetype=obj %s -o - \
+; RUN: | llvm-objdump -d - | FileCheck %s
define i32 @foo (i8 %a)
@@ -7,4 +7,4 @@ define i32 @foo (i8 %a)
ret i32 %1
-; CHECK: 0000 e05f0076 00c09f52
+; CHECK: c0 3f 00 57 57003fc0
diff --git a/test/MC/Hexagon/inst_zxth.ll b/test/MC/Hexagon/inst_zxth.ll
index 962210b..7b3d1b3 100644
--- a/test/MC/Hexagon/inst_zxth.ll
+++ b/test/MC/Hexagon/inst_zxth.ll
@@ -1,5 +1,5 @@
-;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
-;; RUN: | llvm-objdump -s - | FileCheck %s
+; RUN: llc -march=hexagon -filetype=obj %s -o - \
+; RUN: | llvm-objdump -d - | FileCheck %s
define i32 @foo (i16 %a)
@@ -7,4 +7,4 @@ define i32 @foo (i16 %a)
ret i32 %1
-; CHECK: 0000 0040c070 00c09f52
+; CHECK: c0 3f 00 56 56003fc0
diff --git a/test/MC/MachO/absolutize.s b/test/MC/MachO/absolutize.s
index 19917e3..8947c0f 100644
--- a/test/MC/MachO/absolutize.s
+++ b/test/MC/MachO/absolutize.s
@@ -6,9 +6,9 @@ _text_b:
xorl %eax,%eax
xorl %eax,%eax
xorl %eax,%eax
movl $(_text_a - _text_b), %eax
Ltext_expr_0 = _text_a - _text_b
movl $(Ltext_expr_0), %eax
@@ -30,9 +30,9 @@ _data_b:
.long 0
.long 0
.long 0
.long _data_a - _data_b
Ldata_expr_0 = _data_a - _data_b
.long Ldata_expr_0
@@ -75,7 +75,7 @@ Ldata_expr_2 = Ldata_d - Ldata_c
// CHECK: ('offset', 324)
// CHECK: ('alignment', 0)
// CHECK: ('reloc_offset', 412)
-// CHECK: ('num_reloc', 7)
+// CHECK: ('num_reloc', 3)
// CHECK: ('flags', 0x80000400)
// CHECK: ('reserved1', 0)
// CHECK: ('reserved2', 0)
@@ -85,21 +85,9 @@ Ldata_expr_2 = Ldata_d - Ldata_c
// CHECK: (('word-0', 0xa0000027),
// CHECK: ('word-1', 0x0)),
// CHECK: # Relocation 1
-// CHECK: (('word-0', 0xa400001d),
-// CHECK: ('word-1', 0x6)),
-// CHECK: # Relocation 2
-// CHECK: (('word-0', 0xa1000000),
-// CHECK: ('word-1', 0x4)),
-// CHECK: # Relocation 3
-// CHECK: (('word-0', 0xa4000013),
-// CHECK: ('word-1', 0x4)),
-// CHECK: # Relocation 4
-// CHECK: (('word-0', 0xa1000000),
-// CHECK: ('word-1', 0x2)),
-// CHECK: # Relocation 5
// CHECK: (('word-0', 0xa4000009),
// CHECK: ('word-1', 0x0)),
-// CHECK: # Relocation 6
+// CHECK: # Relocation 2
// CHECK: (('word-0', 0xa1000000),
// CHECK: ('word-1', 0x2)),
// CHECK: ])
@@ -111,8 +99,8 @@ Ldata_expr_2 = Ldata_d - Ldata_c
// CHECK: ('size', 44)
// CHECK: ('offset', 367)
// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 468)
-// CHECK: ('num_reloc', 7)
+// CHECK: ('reloc_offset', 436)
+// CHECK: ('num_reloc', 3)
// CHECK: ('flags', 0x0)
// CHECK: ('reserved1', 0)
// CHECK: ('reserved2', 0)
@@ -122,21 +110,9 @@ Ldata_expr_2 = Ldata_d - Ldata_c
// CHECK: (('word-0', 0xa0000028),
// CHECK: ('word-1', 0x2b)),
// CHECK: # Relocation 1
-// CHECK: (('word-0', 0xa4000020),
-// CHECK: ('word-1', 0x37)),
-// CHECK: # Relocation 2
-// CHECK: (('word-0', 0xa1000000),
-// CHECK: ('word-1', 0x33)),
-// CHECK: # Relocation 3
-// CHECK: (('word-0', 0xa4000018),
-// CHECK: ('word-1', 0x33)),
-// CHECK: # Relocation 4
-// CHECK: (('word-0', 0xa1000000),
-// CHECK: ('word-1', 0x2f)),
-// CHECK: # Relocation 5
// CHECK: (('word-0', 0xa4000010),
// CHECK: ('word-1', 0x2b)),
-// CHECK: # Relocation 6
+// CHECK: # Relocation 2
// CHECK: (('word-0', 0xa1000000),
// CHECK: ('word-1', 0x2f)),
// CHECK: ])
@@ -146,9 +122,9 @@ Ldata_expr_2 = Ldata_d - Ldata_c
// CHECK: # Load Command 1
// CHECK: (('command', 2)
// CHECK: ('size', 24)
-// CHECK: ('symoff', 524)
+// CHECK: ('symoff', 460)
// CHECK: ('nsyms', 4)
-// CHECK: ('stroff', 572)
+// CHECK: ('stroff', 508)
// CHECK: ('strsize', 36)
// CHECK: ('_string_data', '\x00_text_b\x00_data_b\x00_text_a\x00_data_a\x00\x00\x00\x00')
// CHECK: ('_symbols', [
diff --git a/test/MC/MachO/reloc-diff.s b/test/MC/MachO/reloc-diff.s
index 601edba..a63a413 100644
--- a/test/MC/MachO/reloc-diff.s
+++ b/test/MC/MachO/reloc-diff.s
@@ -1,39 +1,27 @@
// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
// CHECK: # Relocation 0
-// CHECK: (('word-0', 0xa2000014),
+// CHECK: (('word-0', 0xa4000010),
// CHECK: ('word-1', 0x0)),
// CHECK: # Relocation 1
// CHECK: (('word-0', 0xa1000000),
// CHECK: ('word-1', 0x0)),
// CHECK: # Relocation 2
-// CHECK: (('word-0', 0xa4000010),
+// CHECK: (('word-0', 0xa4000008),
// CHECK: ('word-1', 0x0)),
// CHECK: # Relocation 3
// CHECK: (('word-0', 0xa1000000),
// CHECK: ('word-1', 0x0)),
// CHECK: # Relocation 4
-// CHECK: (('word-0', 0xa400000c),
+// CHECK: (('word-0', 0xa4000004),
// CHECK: ('word-1', 0x0)),
// CHECK: # Relocation 5
// CHECK: (('word-0', 0xa1000000),
// CHECK: ('word-1', 0x0)),
// CHECK: # Relocation 6
-// CHECK: (('word-0', 0xa4000008),
-// CHECK: ('word-1', 0x0)),
-// CHECK: # Relocation 7
-// CHECK: (('word-0', 0xa1000000),
-// CHECK: ('word-1', 0x0)),
-// CHECK: # Relocation 8
-// CHECK: (('word-0', 0xa4000004),
-// CHECK: ('word-1', 0x0)),
-// CHECK: # Relocation 9
-// CHECK: (('word-0', 0xa1000000),
-// CHECK: ('word-1', 0x0)),
-// CHECK: # Relocation 10
// CHECK: (('word-0', 0xa2000000),
// CHECK: ('word-1', 0x0)),
-// CHECK: # Relocation 11
+// CHECK: # Relocation 7
// CHECK: (('word-0', 0xa1000000),
// CHECK: ('word-1', 0x0)),
@@ -43,7 +31,7 @@ _local_def:
.long _external_def - _local_def
.long Ltemp - _local_def
diff --git a/test/MC/Mips/mips-expansions.s b/test/MC/Mips/mips-expansions.s
index b6dc32e..d3fdf39 100644
--- a/test/MC/Mips/mips-expansions.s
+++ b/test/MC/Mips/mips-expansions.s
@@ -1,86 +1,85 @@
-# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
-# Check that the assembler can handle the documented syntax
-# for macro instructions
-# Load immediate instructions
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | \
+# RUN: FileCheck %s
+# Check that the IAS expands macro instructions in the same way as GAS.
+# Load immediate, done by MipsAsmParser::expandLoadImm():
+ li $5, 123
# CHECK: ori $5, $zero, 123 # encoding: [0x7b,0x00,0x05,0x34]
+ li $6, -2345
# CHECK: addiu $6, $zero, -2345 # encoding: [0xd7,0xf6,0x06,0x24]
+ li $7, 65538
# CHECK: lui $7, 1 # encoding: [0x01,0x00,0x07,0x3c]
# CHECK: ori $7, $7, 2 # encoding: [0x02,0x00,0xe7,0x34]
+ li $8, ~7
# CHECK: addiu $8, $zero, -8 # encoding: [0xf8,0xff,0x08,0x24]
+ li $9, 0x10000
# CHECK: lui $9, 1 # encoding: [0x01,0x00,0x09,0x3c]
# CHECK-NOT: ori $9, $9, 0 # encoding: [0x00,0x00,0x29,0x35]
+ li $10, ~(0x101010)
# CHECK: lui $10, 65519 # encoding: [0xef,0xff,0x0a,0x3c]
# CHECK: ori $10, $10, 61423 # encoding: [0xef,0xef,0x4a,0x35]
+# Load address, done by MipsAsmParser::expandLoadAddressReg()
+# and MipsAsmParser::expandLoadAddressImm():
+ la $4, 20
# CHECK: ori $4, $zero, 20 # encoding: [0x14,0x00,0x04,0x34]
+ la $7, 65538
# CHECK: lui $7, 1 # encoding: [0x01,0x00,0x07,0x3c]
# CHECK: ori $7, $7, 2 # encoding: [0x02,0x00,0xe7,0x34]
+ la $4, 20($5)
# CHECK: ori $4, $5, 20 # encoding: [0x14,0x00,0xa4,0x34]
+ la $7, 65538($8)
# CHECK: lui $7, 1 # encoding: [0x01,0x00,0x07,0x3c]
# CHECK: ori $7, $7, 2 # encoding: [0x02,0x00,0xe7,0x34]
# CHECK: addu $7, $7, $8 # encoding: [0x21,0x38,0xe8,0x00]
+ la $8, symbol
# CHECK: lui $8, %hi(symbol) # encoding: [A,A,0x08,0x3c]
# CHECK: # fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
# CHECK: ori $8, $8, %lo(symbol) # encoding: [A,A,0x08,0x35]
# CHECK: # fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+# LW/SW and LDC1/SDC1 of symbol address, done by MipsAsmParser::expandMemInst():
+ .set noat
+ lw $10, symbol($4)
# CHECK: lui $10, %hi(symbol) # encoding: [A,A,0x0a,0x3c]
# CHECK: # fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
# CHECK: addu $10, $10, $4 # encoding: [0x21,0x50,0x44,0x01]
# CHECK: lw $10, %lo(symbol)($10) # encoding: [A,A,0x4a,0x8d]
# CHECK: # fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+ .set at
+ sw $10, symbol($9)
# CHECK: lui $1, %hi(symbol) # encoding: [A,A,0x01,0x3c]
# CHECK: # fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
# CHECK: addu $1, $1, $9 # encoding: [0x21,0x08,0x29,0x00]
# CHECK: sw $10, %lo(symbol)($1) # encoding: [A,A,0x2a,0xac]
# CHECK: # fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+ lw $10, 655483($4)
# CHECK: lui $10, 10 # encoding: [0x0a,0x00,0x0a,0x3c]
# CHECK: addu $10, $10, $4 # encoding: [0x21,0x50,0x44,0x01]
# CHECK: lw $10, 123($10) # encoding: [0x7b,0x00,0x4a,0x8d]
+ sw $10, 123456($9)
# CHECK: lui $1, 2 # encoding: [0x02,0x00,0x01,0x3c]
# CHECK: addu $1, $1, $9 # encoding: [0x21,0x08,0x29,0x00]
# CHECK: sw $10, 57920($1) # encoding: [0x40,0xe2,0x2a,0xac]
+ lw $8, symbol
# CHECK: lui $8, %hi(symbol) # encoding: [A,A,0x08,0x3c]
# CHECK: # fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
# CHECK-NOT: move $8, $8 # encoding: [0x21,0x40,0x00,0x01]
# CHECK: lw $8, %lo(symbol)($8) # encoding: [A,A,0x08,0x8d]
# CHECK: # fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+ sw $8, symbol
# CHECK: lui $1, %hi(symbol) # encoding: [A,A,0x01,0x3c]
# CHECK: # fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
# CHECK-NOT: move $1, $1 # encoding: [0x21,0x08,0x20,0x00]
# CHECK: sw $8, %lo(symbol)($1) # encoding: [A,A,0x28,0xac]
# CHECK: # fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+ ldc1 $f0, symbol
# CHECK: lui $1, %hi(symbol)
# CHECK: ldc1 $f0, %lo(symbol)($1)
+ sdc1 $f0, symbol
# CHECK: lui $1, %hi(symbol)
# CHECK: sdc1 $f0, %lo(symbol)($1)
- li $5,123
- li $6,-2345
- li $7,65538
- li $8, ~7
- li $9, 0x10000
- li $10, ~(0x101010)
- la $a0, 20
- la $7,65538
- la $a0, 20($a1)
- la $7,65538($8)
- la $t0, symbol
- .set noat
- lw $t2, symbol($a0)
- .set at
- sw $t2, symbol($t1)
- lw $t2, 655483($a0)
- sw $t2, 123456($t1)
- lw $8, symbol
- sw $8, symbol
- ldc1 $f0, symbol
- sdc1 $f0, symbol
diff --git a/test/MC/Mips/mips-pdr.s b/test/MC/Mips/mips-pdr.s
index 372c259..79e824b 100644
--- a/test/MC/Mips/mips-pdr.s
+++ b/test/MC/Mips/mips-pdr.s
@@ -2,7 +2,7 @@
# RUN: FileCheck %s -check-prefix=ASMOUT
# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -filetype=obj -o - | \
-# RUN: llvm-readobj -s -section-data | \
+# RUN: llvm-readobj -s -section-data -r | \
# RUN: FileCheck %s -check-prefix=OBJOUT
# ASMOUT: .text
@@ -32,16 +32,12 @@
# We should also check if relocation information was correctly generated.
-# OBJOUT: Section {
-# OBJOUT: Name: .rel.pdr
-# OBJOUT: Type: SHT_REL (0x9)
-# OBJOUT: Flags [ (0x0)
-# OBJOUT: ]
-# OBJOUT: Size: 16
-# OBJOUT: SectionData (
-# OBJOUT: 0000: 00000000 00000202 00000020 00000802
-# OBJOUT: )
-# OBJOUT: }
+# OBJOUT: Relocations [
+# OBJOUT-NEXT: Section (6) .rel.pdr {
+# OBJOUT-NEXT: 0x0 R_MIPS_32 .text 0x0
+# OBJOUT-NEXT: 0x20 R_MIPS_32 _global_foo 0x0
.type _local_foo,@function
diff --git a/test/MC/Mips/mips-relocations.s b/test/MC/Mips/mips-relocations.s
index 6f095d1..13cea2f 100644
--- a/test/MC/Mips/mips-relocations.s
+++ b/test/MC/Mips/mips-relocations.s
@@ -11,8 +11,8 @@
# CHECK: # fixup A - offset: 0, value: loop_1@GOT, kind: fixup_Mips_GOT_Local
# CHECK: lui $2, %dtprel_hi(_gp_disp) # encoding: [A,A,0x02,0x3c]
# CHECK: # fixup A - offset: 0, value: _gp_disp@DTPREL_HI, kind: fixup_Mips_DTPREL_HI
-# CHECK: addiu $2, $2, %dtprel_hi(_gp_disp) # encoding: [A,A,0x42,0x24]
-# CHECK: # fixup A - offset: 0, value: _gp_disp@DTPREL_HI, kind: fixup_Mips_DTPREL_HI
+# CHECK: addiu $2, $2, %dtprel_lo(_gp_disp) # encoding: [A,A,0x42,0x24]
+# CHECK: # fixup A - offset: 0, value: _gp_disp@DTPREL_LO, kind: fixup_Mips_DTPREL_LO
# CHECK: lw $3, %got(loop_1)($2) # encoding: [A,A,0x43,0x8c]
# CHECK: # fixup A - offset: 0, value: loop_1@GOT, kind: fixup_Mips_GOT_Local
# CHECK: lw $4, %got_disp(loop_2)($3) # encoding: [A,A,0x64,0x8c]
@@ -26,15 +26,15 @@
# CHECK: addiu $2, $2, %tprel_lo(_gp_disp) # encoding: [A,A,0x42,0x24]
# CHECK: # fixup A - offset: 0, value: _gp_disp@TPREL_LO, kind: fixup_Mips_TPREL_LO
- lui $2, %hi(_gp_disp)
- addiu $2, $2, %lo(_gp_disp)
- lw $25, %call16(strchr)($gp)
+ lui $2, %hi(_gp_disp)
+ addiu $2, $2, %lo(_gp_disp)
+ lw $25, %call16(strchr)($gp)
lw $3, %got(loop_1)($2)
- lui $2, %dtprel_hi(_gp_disp)
- addiu $2, $2, %dtprel_hi(_gp_disp)
- lw $3, %got(loop_1)($2)
- lw $4, %got_disp(loop_2)($3)
- lw $5, %got_page(loop_3)($4)
- lw $6, %got_ofst(loop_4)($5)
- lui $2, %tprel_hi(_gp_disp)
- addiu $2, $2, %tprel_lo(_gp_disp)
+ lui $2, %dtprel_hi(_gp_disp)
+ addiu $2, $2, %dtprel_lo(_gp_disp)
+ lw $3, %got(loop_1)($2)
+ lw $4, %got_disp(loop_2)($3)
+ lw $5, %got_page(loop_3)($4)
+ lw $6, %got_ofst(loop_4)($5)
+ lui $2, %tprel_hi(_gp_disp)
+ addiu $2, $2, %tprel_lo(_gp_disp)
diff --git a/test/MC/Mips/octeon-instructions.s b/test/MC/Mips/octeon-instructions.s
index 34830c0..cbb9908 100644
--- a/test/MC/Mips/octeon-instructions.s
+++ b/test/MC/Mips/octeon-instructions.s
@@ -15,6 +15,8 @@
# CHECK: cins32 $22, $22, 9, 22 # encoding: [0x72,0xd6,0xb2,0x73]
# CHECK: cins32 $24, $ra, 0, 31 # encoding: [0x73,0xf8,0xf8,0x33]
# CHECK: cins32 $15, $15, 5, 5 # encoding: [0x71,0xef,0x29,0x73]
+# CHECK: dmtc2 $2, 16455 # encoding: [0x48,0xa2,0x40,0x47]
+# CHECK: dmfc2 $2, 64 # encoding: [0x48,0x22,0x00,0x40]
# CHECK: dmul $9, $6, $7 # encoding: [0x70,0xc7,0x48,0x03]
# CHECK: dmul $19, $24, $25 # encoding: [0x73,0x19,0x98,0x03]
# CHECK: dmul $9, $9, $6 # encoding: [0x71,0x26,0x48,0x03]
@@ -72,6 +74,8 @@ foo:
cins32 $22, 9, 22
cins $24, $31, 32, 31
cins $15, 37, 5
+ dmtc2 $2, 0x4047
+ dmfc2 $2, 0x0040
dmul $9, $6, $7
dmul $19, $24, $25
dmul $9, $6
diff --git a/test/MC/Mips/relocation.s b/test/MC/Mips/relocation.s
new file mode 100644
index 0000000..642b409
--- /dev/null
+++ b/test/MC/Mips/relocation.s
@@ -0,0 +1,10 @@
+// RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux < %s | llvm-readobj -r | FileCheck %s
+// Test that we produce the correct relocation.
+// FIXME: move more relocation only tests here.
+ .long foo
+// CHECK: R_MIPS_32 foo
+ .long foo-.
+// CHECK: R_MIPS_PC32 foo
diff --git a/test/MC/Mips/set-push-pop-directives-bad.s b/test/MC/Mips/set-push-pop-directives-bad.s
index 53d8b23..8994eea 100644
--- a/test/MC/Mips/set-push-pop-directives-bad.s
+++ b/test/MC/Mips/set-push-pop-directives-bad.s
@@ -12,3 +12,12 @@
# CHECK: :[[@LINE-1]]:19: error: unexpected token, expected end of statement
.set pop bar
# CHECK: :[[@LINE-1]]:18: error: unexpected token, expected end of statement
+ .set hardfloat
+ .set push
+ .set softfloat
+ add.s $f2, $f2, $f2
+# CHECK: :[[@LINE-1]]:9: error: instruction requires a CPU feature not currently enabled
+ .set pop
+ add.s $f2, $f2, $f2
+# CHECK-NOT: :[[@LINE-1]]:9: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/set-push-pop-directives.s b/test/MC/Mips/set-push-pop-directives.s
index 5f55b7c..3a0b2ae 100644
--- a/test/MC/Mips/set-push-pop-directives.s
+++ b/test/MC/Mips/set-push-pop-directives.s
@@ -51,3 +51,20 @@
# CHECK: b 1336
# CHECK: nop
# CHECK: addvi.b $w15, $w13, 18
+ .set push
+ .set dsp
+ lbux $7, $10($11)
+ .set pop
+ .set push
+ .set dsp
+ lbux $7, $10($11)
+# CHECK-NOT: :[[@LINE-1]]:5: error: instruction requires a CPU feature not currently enabled
+ .set pop
+ .set push
+ .set dsp
+ lbux $7, $10($11)
+# CHECK-NOT: :[[@LINE-1]]:5: error: instruction requires a CPU feature not currently enabled
+ .set pop
diff --git a/test/MC/Mips/set-softfloat-hardfloat-bad.s b/test/MC/Mips/set-softfloat-hardfloat-bad.s
new file mode 100644
index 0000000..14b1e78
--- /dev/null
+++ b/test/MC/Mips/set-softfloat-hardfloat-bad.s
@@ -0,0 +1,14 @@
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32 -mattr=+soft-float 2>%t1
+# RUN: FileCheck %s < %t1
+ .set hardfloat
+ add.s $f2, $f2, $f2
+ # CHECK-NOT: :[[@LINE-1]]:3: error: instruction requires a CPU feature not currently enabled
+ sub.s $f2, $f2, $f2
+ # CHECK-NOT: :[[@LINE-1]]:3: error: instruction requires a CPU feature not currently enabled
+ .set softfloat
+ add.s $f2, $f2, $f2
+ # CHECK: :[[@LINE-1]]:3: error: instruction requires a CPU feature not currently enabled
+ sub.s $f2, $f2, $f2
+ # CHECK: :[[@LINE-1]]:3: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/set-softfloat-hardfloat.s b/test/MC/Mips/set-softfloat-hardfloat.s
new file mode 100644
index 0000000..ffb10f3
--- /dev/null
+++ b/test/MC/Mips/set-softfloat-hardfloat.s
@@ -0,0 +1,12 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r2 -mattr=+soft-float | \
+# RUN: FileCheck %s
+ .set hardfloat
+ add.s $f2, $f2, $f2
+ sub.s $f2, $f2, $f2
+ .set softfloat
+# CHECK: .set hardfloat
+# CHECK: add.s $f2, $f2, $f2
+# CHECK: sub.s $f2, $f2, $f2
+# CHECK: .set softfloat
diff --git a/test/MC/PowerPC/st-other-crash.s b/test/MC/PowerPC/st-other-crash.s
new file mode 100644
index 0000000..fcc56ad
--- /dev/null
+++ b/test/MC/PowerPC/st-other-crash.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc < %s -filetype=obj -triple powerpc64le-pc-linux | \
+// RUN: llvm-readobj -t | FileCheck %s
+// This used to crash. Make sure it produce the correct symbol.
+// CHECK: Symbol {
+// CHECK: Name: _ZN4llvm11SmallVectorIcLj0EEC2Ev (12)
+// CHECK-NEXT: Value: 0x0
+// CHECK-NEXT: Size: 0
+// CHECK-NEXT: Binding: Local (0x0)
+// CHECK-NEXT: Type: None (0x0)
+// CHECK-NEXT: Other: 64
+// CHECK-NEXT: Section: .group (0x5)
+ .section .text._ZN4llvm11SmallVectorIcLj0EEC2Ev,"axG",@progbits,_ZN4llvm11SmallVectorIcLj0EEC2Ev,comdat
+ addis 2, 12, .TOC.-.Ltmp2@ha
+ .localentry _ZN4llvm11SmallVectorIcLj0EEC2Ev, .Ltmp3-.Ltmp2
diff --git a/test/MC/PowerPC/vsx.s b/test/MC/PowerPC/vsx.s
index 773fc9e..352fc51 100644
--- a/test/MC/PowerPC/vsx.s
+++ b/test/MC/PowerPC/vsx.s
@@ -95,6 +95,12 @@
# CHECK-BE: xsmaddmdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x4c]
# CHECK-LE: xsmaddmdp 7, 63, 27 # encoding: [0x4c,0xd9,0xff,0xf0]
xsmaddmdp 7, 63, 27
+# CHECK-BE: xsmaddasp 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0x0c]
+# CHECK-LE: xsmaddasp 7, 63, 27 # encoding: [0x0c,0xd8,0xff,0xf0]
+ xsmaddasp 7, 63, 27
+# CHECK-BE: xsmaddmsp 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0x4c]
+# CHECK-LE: xsmaddmsp 7, 63, 27 # encoding: [0x4c,0xd8,0xff,0xf0]
+ xsmaddmsp 7, 63, 27
# CHECK-BE: xsmaxdp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x04]
# CHECK-LE: xsmaxdp 7, 63, 27 # encoding: [0x04,0xdd,0xff,0xf0]
xsmaxdp 7, 63, 27
@@ -107,6 +113,12 @@
# CHECK-BE: xsmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0xcc]
# CHECK-LE: xsmsubmdp 7, 63, 27 # encoding: [0xcc,0xd9,0xff,0xf0]
xsmsubmdp 7, 63, 27
+# CHECK-BE: xsmsubasp 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0x8c]
+# CHECK-LE: xsmsubasp 7, 63, 27 # encoding: [0x8c,0xd8,0xff,0xf0]
+ xsmsubasp 7, 63, 27
+# CHECK-BE: xsmsubmsp 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0xcc]
+# CHECK-LE: xsmsubmsp 7, 63, 27 # encoding: [0xcc,0xd8,0xff,0xf0]
+ xsmsubmsp 7, 63, 27
# CHECK-BE: xsmulsp 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0x84]
# CHECK-LE: xsmulsp 7, 63, 27 # encoding: [0x84,0xd8,0xff,0xf0]
xsmulsp 7, 63, 27
@@ -131,6 +143,18 @@
# CHECK-BE: xsnmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0xcc]
# CHECK-LE: xsnmsubmdp 7, 63, 27 # encoding: [0xcc,0xdd,0xff,0xf0]
xsnmsubmdp 7, 63, 27
+# CHECK-BE: xsnmaddasp 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0x0c]
+# CHECK-LE: xsnmaddasp 7, 63, 27 # encoding: [0x0c,0xdc,0xff,0xf0]
+ xsnmaddasp 7, 63, 27
+# CHECK-BE: xsnmaddmsp 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0x4c]
+# CHECK-LE: xsnmaddmsp 7, 63, 27 # encoding: [0x4c,0xdc,0xff,0xf0]
+ xsnmaddmsp 7, 63, 27
+# CHECK-BE: xsnmsubasp 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0x8c]
+# CHECK-LE: xsnmsubasp 7, 63, 27 # encoding: [0x8c,0xdc,0xff,0xf0]
+ xsnmsubasp 7, 63, 27
+# CHECK-BE: xsnmsubmsp 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0xcc]
+# CHECK-LE: xsnmsubmsp 7, 63, 27 # encoding: [0xcc,0xdc,0xff,0xf0]
+ xsnmsubmsp 7, 63, 27
# CHECK-BE: xsrdpi 7, 27 # encoding: [0xf0,0xe0,0xd9,0x24]
# CHECK-LE: xsrdpi 7, 27 # encoding: [0x24,0xd9,0xe0,0xf0]
xsrdpi 7, 27
diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s
index cb71a00..ca0fccb 100644
--- a/test/MC/X86/avx512-encodings.s
+++ b/test/MC/X86/avx512-encodings.s
@@ -4796,6 +4796,38 @@
// CHECK: encoding: [0x62,0x61,0x7c,0x58,0x51,0xa2,0xfc,0xfd,0xff,0xff]
vsqrtps -516(%rdx){1to16}, %zmm28
+// CHECK: vsqrtpd {rn-sae}, %zmm19, %zmm19
+// CHECK: encoding: [0x62,0xa1,0xfd,0x18,0x51,0xdb]
+ vsqrtpd {rn-sae}, %zmm19, %zmm19
+// CHECK: vsqrtpd {ru-sae}, %zmm19, %zmm19
+// CHECK: encoding: [0x62,0xa1,0xfd,0x58,0x51,0xdb]
+ vsqrtpd {ru-sae}, %zmm19, %zmm19
+// CHECK: vsqrtpd {rd-sae}, %zmm19, %zmm19
+// CHECK: encoding: [0x62,0xa1,0xfd,0x38,0x51,0xdb]
+ vsqrtpd {rd-sae}, %zmm19, %zmm19
+// CHECK: vsqrtpd {rz-sae}, %zmm19, %zmm19
+// CHECK: encoding: [0x62,0xa1,0xfd,0x78,0x51,0xdb]
+ vsqrtpd {rz-sae}, %zmm19, %zmm19
+// CHECK: vsqrtps {rn-sae}, %zmm29, %zmm28
+// CHECK: encoding: [0x62,0x01,0x7c,0x18,0x51,0xe5]
+ vsqrtps {rn-sae}, %zmm29, %zmm28
+// CHECK: vsqrtps {ru-sae}, %zmm29, %zmm28
+// CHECK: encoding: [0x62,0x01,0x7c,0x58,0x51,0xe5]
+ vsqrtps {ru-sae}, %zmm29, %zmm28
+// CHECK: vsqrtps {rd-sae}, %zmm29, %zmm28
+// CHECK: encoding: [0x62,0x01,0x7c,0x38,0x51,0xe5]
+ vsqrtps {rd-sae}, %zmm29, %zmm28
+// CHECK: vsqrtps {rz-sae}, %zmm29, %zmm28
+// CHECK: encoding: [0x62,0x01,0x7c,0x78,0x51,0xe5]
+ vsqrtps {rz-sae}, %zmm29, %zmm28
// CHECK: vsubpd %zmm9, %zmm12, %zmm9
// CHECK: encoding: [0x62,0x51,0x9d,0x48,0x5c,0xc9]
vsubpd %zmm9, %zmm12, %zmm9
@@ -6052,6 +6084,66 @@ valignq $2, 0x100(%rsp), %zmm0, %zmm1
// CHECK: encoding: [0x62,0xf3,0xfd,0x49,0x03,0xcb,0x03]
valignq $3, %zmm3, %zmm0, %zmm1 {%k1}
+// CHECK: valignq $171, %zmm23, %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x23,0xdd,0x48,0x03,0xe7,0xab]
+ valignq $0xab, %zmm23, %zmm4, %zmm28
+// CHECK: valignq $171, %zmm23, %zmm4, %zmm28 {%k3}
+// CHECK: encoding: [0x62,0x23,0xdd,0x4b,0x03,0xe7,0xab]
+ valignq $0xab, %zmm23, %zmm4, %zmm28 {%k3}
+// CHECK: valignq $171, %zmm23, %zmm4, %zmm28 {%k3} {z}
+// CHECK: encoding: [0x62,0x23,0xdd,0xcb,0x03,0xe7,0xab]
+ valignq $0xab, %zmm23, %zmm4, %zmm28 {%k3} {z}
+// CHECK: valignq $123, %zmm23, %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x23,0xdd,0x48,0x03,0xe7,0x7b]
+ valignq $0x7b, %zmm23, %zmm4, %zmm28
+// CHECK: valignq $123, (%rcx), %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x48,0x03,0x21,0x7b]
+ valignq $0x7b, (%rcx), %zmm4, %zmm28
+// CHECK: valignq $123, 291(%rax,%r14,8), %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x23,0xdd,0x48,0x03,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ valignq $0x7b, 291(%rax,%r14,8), %zmm4, %zmm28
+// CHECK: valignq $123, (%rcx){1to8}, %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x58,0x03,0x21,0x7b]
+ valignq $0x7b, (%rcx){1to8}, %zmm4, %zmm28
+// CHECK: valignq $123, 8128(%rdx), %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x48,0x03,0x62,0x7f,0x7b]
+ valignq $0x7b, 8128(%rdx), %zmm4, %zmm28
+// CHECK: valignq $123, 8192(%rdx), %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x48,0x03,0xa2,0x00,0x20,0x00,0x00,0x7b]
+ valignq $0x7b, 8192(%rdx), %zmm4, %zmm28
+// CHECK: valignq $123, -8192(%rdx), %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x48,0x03,0x62,0x80,0x7b]
+ valignq $0x7b, -8192(%rdx), %zmm4, %zmm28
+// CHECK: valignq $123, -8256(%rdx), %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x48,0x03,0xa2,0xc0,0xdf,0xff,0xff,0x7b]
+ valignq $0x7b, -8256(%rdx), %zmm4, %zmm28
+// CHECK: valignq $123, 1016(%rdx){1to8}, %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x58,0x03,0x62,0x7f,0x7b]
+ valignq $0x7b, 1016(%rdx){1to8}, %zmm4, %zmm28
+// CHECK: valignq $123, 1024(%rdx){1to8}, %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x58,0x03,0xa2,0x00,0x04,0x00,0x00,0x7b]
+ valignq $0x7b, 1024(%rdx){1to8}, %zmm4, %zmm28
+// CHECK: valignq $123, -1024(%rdx){1to8}, %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x58,0x03,0x62,0x80,0x7b]
+ valignq $0x7b, -1024(%rdx){1to8}, %zmm4, %zmm28
+// CHECK: valignq $123, -1032(%rdx){1to8}, %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x58,0x03,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+ valignq $0x7b, -1032(%rdx){1to8}, %zmm4, %zmm28
// CHECK: vextractf32x4 $3
// CHECK: encoding: [0x62,0xf3,0x7d,0x49,0x19,0xd9,0x03]
vextractf32x4 $3, %zmm3, %xmm1 {%k1}
@@ -7724,3 +7816,1000 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xf1,0x95,0x50,0x72,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
vpsraq $123, -1032(%rdx){1to8}, %zmm29
+// CHECK: vfixupimmps $171, %zmm2, %zmm26, %zmm15
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x54,0xfa,0xab]
+ vfixupimmps $0xab, %zmm2, %zmm26, %zmm15
+// CHECK: vfixupimmps $171, %zmm2, %zmm26, %zmm15 {%k4}
+// CHECK: encoding: [0x62,0x73,0x2d,0x44,0x54,0xfa,0xab]
+ vfixupimmps $0xab, %zmm2, %zmm26, %zmm15 {%k4}
+// CHECK: vfixupimmps $171, %zmm2, %zmm26, %zmm15 {%k4} {z}
+// CHECK: encoding: [0x62,0x73,0x2d,0xc4,0x54,0xfa,0xab]
+ vfixupimmps $0xab, %zmm2, %zmm26, %zmm15 {%k4} {z}
+// CHECK: vfixupimmps $171,{sae}, %zmm2, %zmm26, %zmm15
+// CHECK: encoding: [0x62,0x73,0x2d,0x10,0x54,0xfa,0xab]
+ vfixupimmps $0xab,{sae}, %zmm2, %zmm26, %zmm15
+// CHECK: vfixupimmps $123, %zmm2, %zmm26, %zmm15
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x54,0xfa,0x7b]
+ vfixupimmps $0x7b, %zmm2, %zmm26, %zmm15
+// CHECK: vfixupimmps $123,{sae}, %zmm2, %zmm26, %zmm15
+// CHECK: encoding: [0x62,0x73,0x2d,0x10,0x54,0xfa,0x7b]
+ vfixupimmps $0x7b,{sae}, %zmm2, %zmm26, %zmm15
+// CHECK: vfixupimmps $123, (%rcx), %zmm26, %zmm15
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x54,0x39,0x7b]
+ vfixupimmps $0x7b, (%rcx), %zmm26, %zmm15
+// CHECK: vfixupimmps $123, 291(%rax,%r14,8), %zmm26, %zmm15
+// CHECK: encoding: [0x62,0x33,0x2d,0x40,0x54,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vfixupimmps $0x7b, 291(%rax,%r14,8), %zmm26, %zmm15
+// CHECK: vfixupimmps $123, (%rcx){1to16}, %zmm26, %zmm15
+// CHECK: encoding: [0x62,0x73,0x2d,0x50,0x54,0x39,0x7b]
+ vfixupimmps $0x7b, (%rcx){1to16}, %zmm26, %zmm15
+// CHECK: vfixupimmps $123, 8128(%rdx), %zmm26, %zmm15
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x54,0x7a,0x7f,0x7b]
+ vfixupimmps $0x7b, 8128(%rdx), %zmm26, %zmm15
+// CHECK: vfixupimmps $123, 8192(%rdx), %zmm26, %zmm15
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x54,0xba,0x00,0x20,0x00,0x00,0x7b]
+ vfixupimmps $0x7b, 8192(%rdx), %zmm26, %zmm15
+// CHECK: vfixupimmps $123, -8192(%rdx), %zmm26, %zmm15
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x54,0x7a,0x80,0x7b]
+ vfixupimmps $0x7b, -8192(%rdx), %zmm26, %zmm15
+// CHECK: vfixupimmps $123, -8256(%rdx), %zmm26, %zmm15
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x54,0xba,0xc0,0xdf,0xff,0xff,0x7b]
+ vfixupimmps $0x7b, -8256(%rdx), %zmm26, %zmm15
+// CHECK: vfixupimmps $123, 508(%rdx){1to16}, %zmm26, %zmm15
+// CHECK: encoding: [0x62,0x73,0x2d,0x50,0x54,0x7a,0x7f,0x7b]
+ vfixupimmps $0x7b, 508(%rdx){1to16}, %zmm26, %zmm15
+// CHECK: vfixupimmps $123, 512(%rdx){1to16}, %zmm26, %zmm15
+// CHECK: encoding: [0x62,0x73,0x2d,0x50,0x54,0xba,0x00,0x02,0x00,0x00,0x7b]
+ vfixupimmps $0x7b, 512(%rdx){1to16}, %zmm26, %zmm15
+// CHECK: vfixupimmps $123, -512(%rdx){1to16}, %zmm26, %zmm15
+// CHECK: encoding: [0x62,0x73,0x2d,0x50,0x54,0x7a,0x80,0x7b]
+ vfixupimmps $0x7b, -512(%rdx){1to16}, %zmm26, %zmm15
+// CHECK: vfixupimmps $123, -516(%rdx){1to16}, %zmm26, %zmm15
+// CHECK: encoding: [0x62,0x73,0x2d,0x50,0x54,0xba,0xfc,0xfd,0xff,0xff,0x7b]
+ vfixupimmps $0x7b, -516(%rdx){1to16}, %zmm26, %zmm15
+// CHECK: vfixupimmpd $171, %zmm19, %zmm21, %zmm9
+// CHECK: encoding: [0x62,0x33,0xd5,0x40,0x54,0xcb,0xab]
+ vfixupimmpd $0xab, %zmm19, %zmm21, %zmm9
+// CHECK: vfixupimmpd $171, %zmm19, %zmm21, %zmm9 {%k2}
+// CHECK: encoding: [0x62,0x33,0xd5,0x42,0x54,0xcb,0xab]
+ vfixupimmpd $0xab, %zmm19, %zmm21, %zmm9 {%k2}
+// CHECK: vfixupimmpd $171, %zmm19, %zmm21, %zmm9 {%k2} {z}
+// CHECK: encoding: [0x62,0x33,0xd5,0xc2,0x54,0xcb,0xab]
+ vfixupimmpd $0xab, %zmm19, %zmm21, %zmm9 {%k2} {z}
+// CHECK: vfixupimmpd $171,{sae}, %zmm19, %zmm21, %zmm9
+// CHECK: encoding: [0x62,0x33,0xd5,0x10,0x54,0xcb,0xab]
+ vfixupimmpd $0xab,{sae}, %zmm19, %zmm21, %zmm9
+// CHECK: vfixupimmpd $123, %zmm19, %zmm21, %zmm9
+// CHECK: encoding: [0x62,0x33,0xd5,0x40,0x54,0xcb,0x7b]
+ vfixupimmpd $0x7b, %zmm19, %zmm21, %zmm9
+// CHECK: vfixupimmpd $123,{sae}, %zmm19, %zmm21, %zmm9
+// CHECK: encoding: [0x62,0x33,0xd5,0x10,0x54,0xcb,0x7b]
+ vfixupimmpd $0x7b,{sae}, %zmm19, %zmm21, %zmm9
+// CHECK: vfixupimmpd $123, (%rcx), %zmm21, %zmm9
+// CHECK: encoding: [0x62,0x73,0xd5,0x40,0x54,0x09,0x7b]
+ vfixupimmpd $0x7b, (%rcx), %zmm21, %zmm9
+// CHECK: vfixupimmpd $123, 291(%rax,%r14,8), %zmm21, %zmm9
+// CHECK: encoding: [0x62,0x33,0xd5,0x40,0x54,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vfixupimmpd $0x7b, 291(%rax,%r14,8), %zmm21, %zmm9
+// CHECK: vfixupimmpd $123, (%rcx){1to8}, %zmm21, %zmm9
+// CHECK: encoding: [0x62,0x73,0xd5,0x50,0x54,0x09,0x7b]
+ vfixupimmpd $0x7b, (%rcx){1to8}, %zmm21, %zmm9
+// CHECK: vfixupimmpd $123, 8128(%rdx), %zmm21, %zmm9
+// CHECK: encoding: [0x62,0x73,0xd5,0x40,0x54,0x4a,0x7f,0x7b]
+ vfixupimmpd $0x7b, 8128(%rdx), %zmm21, %zmm9
+// CHECK: vfixupimmpd $123, 8192(%rdx), %zmm21, %zmm9
+// CHECK: encoding: [0x62,0x73,0xd5,0x40,0x54,0x8a,0x00,0x20,0x00,0x00,0x7b]
+ vfixupimmpd $0x7b, 8192(%rdx), %zmm21, %zmm9
+// CHECK: vfixupimmpd $123, -8192(%rdx), %zmm21, %zmm9
+// CHECK: encoding: [0x62,0x73,0xd5,0x40,0x54,0x4a,0x80,0x7b]
+ vfixupimmpd $0x7b, -8192(%rdx), %zmm21, %zmm9
+// CHECK: vfixupimmpd $123, -8256(%rdx), %zmm21, %zmm9
+// CHECK: encoding: [0x62,0x73,0xd5,0x40,0x54,0x8a,0xc0,0xdf,0xff,0xff,0x7b]
+ vfixupimmpd $0x7b, -8256(%rdx), %zmm21, %zmm9
+// CHECK: vfixupimmpd $123, 1016(%rdx){1to8}, %zmm21, %zmm9
+// CHECK: encoding: [0x62,0x73,0xd5,0x50,0x54,0x4a,0x7f,0x7b]
+ vfixupimmpd $0x7b, 1016(%rdx){1to8}, %zmm21, %zmm9
+// CHECK: vfixupimmpd $123, 1024(%rdx){1to8}, %zmm21, %zmm9
+// CHECK: encoding: [0x62,0x73,0xd5,0x50,0x54,0x8a,0x00,0x04,0x00,0x00,0x7b]
+ vfixupimmpd $0x7b, 1024(%rdx){1to8}, %zmm21, %zmm9
+// CHECK: vfixupimmpd $123, -1024(%rdx){1to8}, %zmm21, %zmm9
+// CHECK: encoding: [0x62,0x73,0xd5,0x50,0x54,0x4a,0x80,0x7b]
+ vfixupimmpd $0x7b, -1024(%rdx){1to8}, %zmm21, %zmm9
+// CHECK: vfixupimmpd $123, -1032(%rdx){1to8}, %zmm21, %zmm9
+// CHECK: encoding: [0x62,0x73,0xd5,0x50,0x54,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+ vfixupimmpd $0x7b, -1032(%rdx){1to8}, %zmm21, %zmm9
+// CHECK: vfixupimmss $171, %xmm28, %xmm18, %xmm15
+// CHECK: encoding: [0x62,0x13,0x6d,0x00,0x55,0xfc,0xab]
+ vfixupimmss $0xab, %xmm28, %xmm18, %xmm15
+// CHECK: vfixupimmss $171, %xmm28, %xmm18, %xmm15 {%k5}
+// CHECK: encoding: [0x62,0x13,0x6d,0x05,0x55,0xfc,0xab]
+ vfixupimmss $0xab, %xmm28, %xmm18, %xmm15 {%k5}
+// CHECK: vfixupimmss $171, %xmm28, %xmm18, %xmm15 {%k5} {z}
+// CHECK: encoding: [0x62,0x13,0x6d,0x85,0x55,0xfc,0xab]
+ vfixupimmss $0xab, %xmm28, %xmm18, %xmm15 {%k5} {z}
+// CHECK: vfixupimmss $171,{sae}, %xmm28, %xmm18, %xmm15
+// CHECK: encoding: [0x62,0x13,0x6d,0x10,0x55,0xfc,0xab]
+ vfixupimmss $0xab,{sae}, %xmm28, %xmm18, %xmm15
+// CHECK: vfixupimmss $123, %xmm28, %xmm18, %xmm15
+// CHECK: encoding: [0x62,0x13,0x6d,0x00,0x55,0xfc,0x7b]
+ vfixupimmss $0x7b, %xmm28, %xmm18, %xmm15
+// CHECK: vfixupimmss $123,{sae}, %xmm28, %xmm18, %xmm15
+// CHECK: encoding: [0x62,0x13,0x6d,0x10,0x55,0xfc,0x7b]
+ vfixupimmss $0x7b,{sae}, %xmm28, %xmm18, %xmm15
+// CHECK: vfixupimmss $123, (%rcx), %xmm18, %xmm15
+// CHECK: encoding: [0x62,0x73,0x6d,0x00,0x55,0x39,0x7b]
+ vfixupimmss $0x7b, (%rcx), %xmm18, %xmm15
+// CHECK: vfixupimmss $123, 291(%rax,%r14,8), %xmm18, %xmm15
+// CHECK: encoding: [0x62,0x33,0x6d,0x00,0x55,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vfixupimmss $0x7b, 291(%rax,%r14,8), %xmm18, %xmm15
+// CHECK: vfixupimmss $123, 508(%rdx), %xmm18, %xmm15
+// CHECK: encoding: [0x62,0x73,0x6d,0x00,0x55,0x7a,0x7f,0x7b]
+ vfixupimmss $0x7b, 508(%rdx), %xmm18, %xmm15
+// CHECK: vfixupimmss $123, 512(%rdx), %xmm18, %xmm15
+// CHECK: encoding: [0x62,0x73,0x6d,0x00,0x55,0xba,0x00,0x02,0x00,0x00,0x7b]
+ vfixupimmss $0x7b, 512(%rdx), %xmm18, %xmm15
+// CHECK: vfixupimmss $123, -512(%rdx), %xmm18, %xmm15
+// CHECK: encoding: [0x62,0x73,0x6d,0x00,0x55,0x7a,0x80,0x7b]
+ vfixupimmss $0x7b, -512(%rdx), %xmm18, %xmm15
+// CHECK: vfixupimmss $123, -516(%rdx), %xmm18, %xmm15
+// CHECK: encoding: [0x62,0x73,0x6d,0x00,0x55,0xba,0xfc,0xfd,0xff,0xff,0x7b]
+ vfixupimmss $0x7b, -516(%rdx), %xmm18, %xmm15
+// CHECK: vfixupimmsd $171, %xmm5, %xmm26, %xmm13
+// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0xed,0xab]
+ vfixupimmsd $0xab, %xmm5, %xmm26, %xmm13
+// CHECK: vfixupimmsd $171, %xmm5, %xmm26, %xmm13 {%k6}
+// CHECK: encoding: [0x62,0x73,0xad,0x06,0x55,0xed,0xab]
+ vfixupimmsd $0xab, %xmm5, %xmm26, %xmm13 {%k6}
+// CHECK: vfixupimmsd $171, %xmm5, %xmm26, %xmm13 {%k6} {z}
+// CHECK: encoding: [0x62,0x73,0xad,0x86,0x55,0xed,0xab]
+ vfixupimmsd $0xab, %xmm5, %xmm26, %xmm13 {%k6} {z}
+// CHECK: vfixupimmsd $171,{sae}, %xmm5, %xmm26, %xmm13
+// CHECK: encoding: [0x62,0x73,0xad,0x10,0x55,0xed,0xab]
+ vfixupimmsd $0xab,{sae}, %xmm5, %xmm26, %xmm13
+// CHECK: vfixupimmsd $123, %xmm5, %xmm26, %xmm13
+// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0xed,0x7b]
+ vfixupimmsd $0x7b, %xmm5, %xmm26, %xmm13
+// CHECK: vfixupimmsd $123,{sae}, %xmm5, %xmm26, %xmm13
+// CHECK: encoding: [0x62,0x73,0xad,0x10,0x55,0xed,0x7b]
+ vfixupimmsd $0x7b,{sae}, %xmm5, %xmm26, %xmm13
+// CHECK: vfixupimmsd $123, (%rcx), %xmm26, %xmm13
+// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0x29,0x7b]
+ vfixupimmsd $0x7b, (%rcx), %xmm26, %xmm13
+// CHECK: vfixupimmsd $123, 291(%rax,%r14,8), %xmm26, %xmm13
+// CHECK: encoding: [0x62,0x33,0xad,0x00,0x55,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vfixupimmsd $0x7b, 291(%rax,%r14,8), %xmm26, %xmm13
+// CHECK: vfixupimmsd $123, 1016(%rdx), %xmm26, %xmm13
+// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0x6a,0x7f,0x7b]
+ vfixupimmsd $0x7b, 1016(%rdx), %xmm26, %xmm13
+// CHECK: vfixupimmsd $123, 1024(%rdx), %xmm26, %xmm13
+// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0xaa,0x00,0x04,0x00,0x00,0x7b]
+ vfixupimmsd $0x7b, 1024(%rdx), %xmm26, %xmm13
+// CHECK: vfixupimmsd $123, -1024(%rdx), %xmm26, %xmm13
+// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0x6a,0x80,0x7b]
+ vfixupimmsd $0x7b, -1024(%rdx), %xmm26, %xmm13
+// CHECK: vfixupimmsd $123, -1032(%rdx), %xmm26, %xmm13
+// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
+ vfixupimmsd $0x7b, -1032(%rdx), %xmm26, %xmm13
+// CHECK: vpshufd $171, %zmm25, %zmm19
+// CHECK: encoding: [0x62,0x81,0x7d,0x48,0x70,0xd9,0xab]
+ vpshufd $171, %zmm25, %zmm19
+// CHECK: vpshufd $171, %zmm25, %zmm19 {%k6}
+// CHECK: encoding: [0x62,0x81,0x7d,0x4e,0x70,0xd9,0xab]
+ vpshufd $171, %zmm25, %zmm19 {%k6}
+// CHECK: vpshufd $171, %zmm25, %zmm19 {%k6} {z}
+// CHECK: encoding: [0x62,0x81,0x7d,0xce,0x70,0xd9,0xab]
+ vpshufd $171, %zmm25, %zmm19 {%k6} {z}
+// CHECK: vpshufd $123, %zmm25, %zmm19
+// CHECK: encoding: [0x62,0x81,0x7d,0x48,0x70,0xd9,0x7b]
+ vpshufd $123, %zmm25, %zmm19
+// CHECK: vpshufd $123, (%rcx), %zmm19
+// CHECK: encoding: [0x62,0xe1,0x7d,0x48,0x70,0x19,0x7b]
+ vpshufd $123, (%rcx), %zmm19
+// CHECK: vpshufd $123, 291(%rax,%r14,8), %zmm19
+// CHECK: encoding: [0x62,0xa1,0x7d,0x48,0x70,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpshufd $123, 291(%rax,%r14,8), %zmm19
+// CHECK: vpshufd $123, (%rcx){1to16}, %zmm19
+// CHECK: encoding: [0x62,0xe1,0x7d,0x58,0x70,0x19,0x7b]
+ vpshufd $123, (%rcx){1to16}, %zmm19
+// CHECK: vpshufd $123, 8128(%rdx), %zmm19
+// CHECK: encoding: [0x62,0xe1,0x7d,0x48,0x70,0x5a,0x7f,0x7b]
+ vpshufd $123, 8128(%rdx), %zmm19
+// CHECK: vpshufd $123, 8192(%rdx), %zmm19
+// CHECK: encoding: [0x62,0xe1,0x7d,0x48,0x70,0x9a,0x00,0x20,0x00,0x00,0x7b]
+ vpshufd $123, 8192(%rdx), %zmm19
+// CHECK: vpshufd $123, -8192(%rdx), %zmm19
+// CHECK: encoding: [0x62,0xe1,0x7d,0x48,0x70,0x5a,0x80,0x7b]
+ vpshufd $123, -8192(%rdx), %zmm19
+// CHECK: vpshufd $123, -8256(%rdx), %zmm19
+// CHECK: encoding: [0x62,0xe1,0x7d,0x48,0x70,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
+ vpshufd $123, -8256(%rdx), %zmm19
+// CHECK: vpshufd $123, 508(%rdx){1to16}, %zmm19
+// CHECK: encoding: [0x62,0xe1,0x7d,0x58,0x70,0x5a,0x7f,0x7b]
+ vpshufd $123, 508(%rdx){1to16}, %zmm19
+// CHECK: vpshufd $123, 512(%rdx){1to16}, %zmm19
+// CHECK: encoding: [0x62,0xe1,0x7d,0x58,0x70,0x9a,0x00,0x02,0x00,0x00,0x7b]
+ vpshufd $123, 512(%rdx){1to16}, %zmm19
+// CHECK: vpshufd $123, -512(%rdx){1to16}, %zmm19
+// CHECK: encoding: [0x62,0xe1,0x7d,0x58,0x70,0x5a,0x80,0x7b]
+ vpshufd $123, -512(%rdx){1to16}, %zmm19
+// CHECK: vpshufd $123, -516(%rdx){1to16}, %zmm19
+// CHECK: encoding: [0x62,0xe1,0x7d,0x58,0x70,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+ vpshufd $123, -516(%rdx){1to16}, %zmm19
+// CHECK: vgetexppd %zmm25, %zmm14
+// CHECK: encoding: [0x62,0x12,0xfd,0x48,0x42,0xf1]
+ vgetexppd %zmm25, %zmm14
+// CHECK: vgetexppd %zmm25, %zmm14 {%k5}
+// CHECK: encoding: [0x62,0x12,0xfd,0x4d,0x42,0xf1]
+ vgetexppd %zmm25, %zmm14 {%k5}
+// CHECK: vgetexppd %zmm25, %zmm14 {%k5} {z}
+// CHECK: encoding: [0x62,0x12,0xfd,0xcd,0x42,0xf1]
+ vgetexppd %zmm25, %zmm14 {%k5} {z}
+// CHECK: vgetexppd {sae}, %zmm25, %zmm14
+// CHECK: encoding: [0x62,0x12,0xfd,0x18,0x42,0xf1]
+ vgetexppd {sae}, %zmm25, %zmm14
+// CHECK: vgetexppd (%rcx), %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x42,0x31]
+ vgetexppd (%rcx), %zmm14
+// CHECK: vgetexppd 291(%rax,%r14,8), %zmm14
+// CHECK: encoding: [0x62,0x32,0xfd,0x48,0x42,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vgetexppd 291(%rax,%r14,8), %zmm14
+// CHECK: vgetexppd (%rcx){1to8}, %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x42,0x31]
+ vgetexppd (%rcx){1to8}, %zmm14
+// CHECK: vgetexppd 8128(%rdx), %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x42,0x72,0x7f]
+ vgetexppd 8128(%rdx), %zmm14
+// CHECK: vgetexppd 8192(%rdx), %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x42,0xb2,0x00,0x20,0x00,0x00]
+ vgetexppd 8192(%rdx), %zmm14
+// CHECK: vgetexppd -8192(%rdx), %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x42,0x72,0x80]
+ vgetexppd -8192(%rdx), %zmm14
+// CHECK: vgetexppd -8256(%rdx), %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x42,0xb2,0xc0,0xdf,0xff,0xff]
+ vgetexppd -8256(%rdx), %zmm14
+// CHECK: vgetexppd 1016(%rdx){1to8}, %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x42,0x72,0x7f]
+ vgetexppd 1016(%rdx){1to8}, %zmm14
+// CHECK: vgetexppd 1024(%rdx){1to8}, %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x42,0xb2,0x00,0x04,0x00,0x00]
+ vgetexppd 1024(%rdx){1to8}, %zmm14
+// CHECK: vgetexppd -1024(%rdx){1to8}, %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x42,0x72,0x80]
+ vgetexppd -1024(%rdx){1to8}, %zmm14
+// CHECK: vgetexppd -1032(%rdx){1to8}, %zmm14
+// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x42,0xb2,0xf8,0xfb,0xff,0xff]
+ vgetexppd -1032(%rdx){1to8}, %zmm14
+// CHECK: vgetexpps %zmm6, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x42,0xce]
+ vgetexpps %zmm6, %zmm1
+// CHECK: vgetexpps %zmm6, %zmm1 {%k3}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x4b,0x42,0xce]
+ vgetexpps %zmm6, %zmm1 {%k3}
+// CHECK: vgetexpps %zmm6, %zmm1 {%k3} {z}
+// CHECK: encoding: [0x62,0xf2,0x7d,0xcb,0x42,0xce]
+ vgetexpps %zmm6, %zmm1 {%k3} {z}
+// CHECK: vgetexpps {sae}, %zmm6, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x18,0x42,0xce]
+ vgetexpps {sae}, %zmm6, %zmm1
+// CHECK: vgetexpps (%rcx), %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x42,0x09]
+ vgetexpps (%rcx), %zmm1
+// CHECK: vgetexpps 291(%rax,%r14,8), %zmm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x42,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vgetexpps 291(%rax,%r14,8), %zmm1
+// CHECK: vgetexpps (%rcx){1to16}, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x58,0x42,0x09]
+ vgetexpps (%rcx){1to16}, %zmm1
+// CHECK: vgetexpps 8128(%rdx), %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x42,0x4a,0x7f]
+ vgetexpps 8128(%rdx), %zmm1
+// CHECK: vgetexpps 8192(%rdx), %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x42,0x8a,0x00,0x20,0x00,0x00]
+ vgetexpps 8192(%rdx), %zmm1
+// CHECK: vgetexpps -8192(%rdx), %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x42,0x4a,0x80]
+ vgetexpps -8192(%rdx), %zmm1
+// CHECK: vgetexpps -8256(%rdx), %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x42,0x8a,0xc0,0xdf,0xff,0xff]
+ vgetexpps -8256(%rdx), %zmm1
+// CHECK: vgetexpps 508(%rdx){1to16}, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x58,0x42,0x4a,0x7f]
+ vgetexpps 508(%rdx){1to16}, %zmm1
+// CHECK: vgetexpps 512(%rdx){1to16}, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x58,0x42,0x8a,0x00,0x02,0x00,0x00]
+ vgetexpps 512(%rdx){1to16}, %zmm1
+// CHECK: vgetexpps -512(%rdx){1to16}, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x58,0x42,0x4a,0x80]
+ vgetexpps -512(%rdx){1to16}, %zmm1
+// CHECK: vgetexpps -516(%rdx){1to16}, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x58,0x42,0x8a,0xfc,0xfd,0xff,0xff]
+ vgetexpps -516(%rdx){1to16}, %zmm1
+// CHECK: vshuff32x4 $171, %zmm3, %zmm24, %zmm6
+// CHECK: encoding: [0x62,0xf3,0x3d,0x40,0x23,0xf3,0xab]
+ vshuff32x4 $171, %zmm3, %zmm24, %zmm6
+// CHECK: vshuff32x4 $171, %zmm3, %zmm24, %zmm6 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x3d,0x42,0x23,0xf3,0xab]
+ vshuff32x4 $171, %zmm3, %zmm24, %zmm6 {%k2}
+// CHECK: vshuff32x4 $171, %zmm3, %zmm24, %zmm6 {%k2} {z}
+// CHECK: encoding: [0x62,0xf3,0x3d,0xc2,0x23,0xf3,0xab]
+ vshuff32x4 $171, %zmm3, %zmm24, %zmm6 {%k2} {z}
+// CHECK: vshuff32x4 $123, %zmm3, %zmm24, %zmm6
+// CHECK: encoding: [0x62,0xf3,0x3d,0x40,0x23,0xf3,0x7b]
+ vshuff32x4 $123, %zmm3, %zmm24, %zmm6
+// CHECK: vshuff32x4 $123, (%rcx), %zmm24, %zmm6
+// CHECK: encoding: [0x62,0xf3,0x3d,0x40,0x23,0x31,0x7b]
+ vshuff32x4 $123, (%rcx), %zmm24, %zmm6
+// CHECK: vshuff32x4 $123, 291(%rax,%r14,8), %zmm24, %zmm6
+// CHECK: encoding: [0x62,0xb3,0x3d,0x40,0x23,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vshuff32x4 $123, 291(%rax,%r14,8), %zmm24, %zmm6
+// CHECK: vshuff32x4 $123, (%rcx){1to16}, %zmm24, %zmm6
+// CHECK: encoding: [0x62,0xf3,0x3d,0x50,0x23,0x31,0x7b]
+ vshuff32x4 $123, (%rcx){1to16}, %zmm24, %zmm6
+// CHECK: vshuff32x4 $123, 8128(%rdx), %zmm24, %zmm6
+// CHECK: encoding: [0x62,0xf3,0x3d,0x40,0x23,0x72,0x7f,0x7b]
+ vshuff32x4 $123, 8128(%rdx), %zmm24, %zmm6
+// CHECK: vshuff32x4 $123, 8192(%rdx), %zmm24, %zmm6
+// CHECK: encoding: [0x62,0xf3,0x3d,0x40,0x23,0xb2,0x00,0x20,0x00,0x00,0x7b]
+ vshuff32x4 $123, 8192(%rdx), %zmm24, %zmm6
+// CHECK: vshuff32x4 $123, -8192(%rdx), %zmm24, %zmm6
+// CHECK: encoding: [0x62,0xf3,0x3d,0x40,0x23,0x72,0x80,0x7b]
+ vshuff32x4 $123, -8192(%rdx), %zmm24, %zmm6
+// CHECK: vshuff32x4 $123, -8256(%rdx), %zmm24, %zmm6
+// CHECK: encoding: [0x62,0xf3,0x3d,0x40,0x23,0xb2,0xc0,0xdf,0xff,0xff,0x7b]
+ vshuff32x4 $123, -8256(%rdx), %zmm24, %zmm6
+// CHECK: vshuff32x4 $123, 508(%rdx){1to16}, %zmm24, %zmm6
+// CHECK: encoding: [0x62,0xf3,0x3d,0x50,0x23,0x72,0x7f,0x7b]
+ vshuff32x4 $123, 508(%rdx){1to16}, %zmm24, %zmm6
+// CHECK: vshuff32x4 $123, 512(%rdx){1to16}, %zmm24, %zmm6
+// CHECK: encoding: [0x62,0xf3,0x3d,0x50,0x23,0xb2,0x00,0x02,0x00,0x00,0x7b]
+ vshuff32x4 $123, 512(%rdx){1to16}, %zmm24, %zmm6
+// CHECK: vshuff32x4 $123, -512(%rdx){1to16}, %zmm24, %zmm6
+// CHECK: encoding: [0x62,0xf3,0x3d,0x50,0x23,0x72,0x80,0x7b]
+ vshuff32x4 $123, -512(%rdx){1to16}, %zmm24, %zmm6
+// CHECK: vshuff32x4 $123, -516(%rdx){1to16}, %zmm24, %zmm6
+// CHECK: encoding: [0x62,0xf3,0x3d,0x50,0x23,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
+ vshuff32x4 $123, -516(%rdx){1to16}, %zmm24, %zmm6
+// CHECK: vshuff64x2 $171, %zmm11, %zmm25, %zmm15
+// CHECK: encoding: [0x62,0x53,0xb5,0x40,0x23,0xfb,0xab]
+ vshuff64x2 $171, %zmm11, %zmm25, %zmm15
+// CHECK: vshuff64x2 $171, %zmm11, %zmm25, %zmm15 {%k2}
+// CHECK: encoding: [0x62,0x53,0xb5,0x42,0x23,0xfb,0xab]
+ vshuff64x2 $171, %zmm11, %zmm25, %zmm15 {%k2}
+// CHECK: vshuff64x2 $171, %zmm11, %zmm25, %zmm15 {%k2} {z}
+// CHECK: encoding: [0x62,0x53,0xb5,0xc2,0x23,0xfb,0xab]
+ vshuff64x2 $171, %zmm11, %zmm25, %zmm15 {%k2} {z}
+// CHECK: vshuff64x2 $123, %zmm11, %zmm25, %zmm15
+// CHECK: encoding: [0x62,0x53,0xb5,0x40,0x23,0xfb,0x7b]
+ vshuff64x2 $123, %zmm11, %zmm25, %zmm15
+// CHECK: vshuff64x2 $123, (%rcx), %zmm25, %zmm15
+// CHECK: encoding: [0x62,0x73,0xb5,0x40,0x23,0x39,0x7b]
+ vshuff64x2 $123, (%rcx), %zmm25, %zmm15
+// CHECK: vshuff64x2 $123, 291(%rax,%r14,8), %zmm25, %zmm15
+// CHECK: encoding: [0x62,0x33,0xb5,0x40,0x23,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vshuff64x2 $123, 291(%rax,%r14,8), %zmm25, %zmm15
+// CHECK: vshuff64x2 $123, (%rcx){1to8}, %zmm25, %zmm15
+// CHECK: encoding: [0x62,0x73,0xb5,0x50,0x23,0x39,0x7b]
+ vshuff64x2 $123, (%rcx){1to8}, %zmm25, %zmm15
+// CHECK: vshuff64x2 $123, 8128(%rdx), %zmm25, %zmm15
+// CHECK: encoding: [0x62,0x73,0xb5,0x40,0x23,0x7a,0x7f,0x7b]
+ vshuff64x2 $123, 8128(%rdx), %zmm25, %zmm15
+// CHECK: vshuff64x2 $123, 8192(%rdx), %zmm25, %zmm15
+// CHECK: encoding: [0x62,0x73,0xb5,0x40,0x23,0xba,0x00,0x20,0x00,0x00,0x7b]
+ vshuff64x2 $123, 8192(%rdx), %zmm25, %zmm15
+// CHECK: vshuff64x2 $123, -8192(%rdx), %zmm25, %zmm15
+// CHECK: encoding: [0x62,0x73,0xb5,0x40,0x23,0x7a,0x80,0x7b]
+ vshuff64x2 $123, -8192(%rdx), %zmm25, %zmm15
+// CHECK: vshuff64x2 $123, -8256(%rdx), %zmm25, %zmm15
+// CHECK: encoding: [0x62,0x73,0xb5,0x40,0x23,0xba,0xc0,0xdf,0xff,0xff,0x7b]
+ vshuff64x2 $123, -8256(%rdx), %zmm25, %zmm15
+// CHECK: vshuff64x2 $123, 1016(%rdx){1to8}, %zmm25, %zmm15
+// CHECK: encoding: [0x62,0x73,0xb5,0x50,0x23,0x7a,0x7f,0x7b]
+ vshuff64x2 $123, 1016(%rdx){1to8}, %zmm25, %zmm15
+// CHECK: vshuff64x2 $123, 1024(%rdx){1to8}, %zmm25, %zmm15
+// CHECK: encoding: [0x62,0x73,0xb5,0x50,0x23,0xba,0x00,0x04,0x00,0x00,0x7b]
+ vshuff64x2 $123, 1024(%rdx){1to8}, %zmm25, %zmm15
+// CHECK: vshuff64x2 $123, -1024(%rdx){1to8}, %zmm25, %zmm15
+// CHECK: encoding: [0x62,0x73,0xb5,0x50,0x23,0x7a,0x80,0x7b]
+ vshuff64x2 $123, -1024(%rdx){1to8}, %zmm25, %zmm15
+// CHECK: vshuff64x2 $123, -1032(%rdx){1to8}, %zmm25, %zmm15
+// CHECK: encoding: [0x62,0x73,0xb5,0x50,0x23,0xba,0xf8,0xfb,0xff,0xff,0x7b]
+ vshuff64x2 $123, -1032(%rdx){1to8}, %zmm25, %zmm15
+// CHECK: vshufi32x4 $171, %zmm25, %zmm28, %zmm1
+// CHECK: encoding: [0x62,0x93,0x1d,0x40,0x43,0xc9,0xab]
+ vshufi32x4 $171, %zmm25, %zmm28, %zmm1
+// CHECK: vshufi32x4 $171, %zmm25, %zmm28, %zmm1 {%k4}
+// CHECK: encoding: [0x62,0x93,0x1d,0x44,0x43,0xc9,0xab]
+ vshufi32x4 $171, %zmm25, %zmm28, %zmm1 {%k4}
+// CHECK: vshufi32x4 $171, %zmm25, %zmm28, %zmm1 {%k4} {z}
+// CHECK: encoding: [0x62,0x93,0x1d,0xc4,0x43,0xc9,0xab]
+ vshufi32x4 $171, %zmm25, %zmm28, %zmm1 {%k4} {z}
+// CHECK: vshufi32x4 $123, %zmm25, %zmm28, %zmm1
+// CHECK: encoding: [0x62,0x93,0x1d,0x40,0x43,0xc9,0x7b]
+ vshufi32x4 $123, %zmm25, %zmm28, %zmm1
+// CHECK: vshufi32x4 $123, (%rcx), %zmm28, %zmm1
+// CHECK: encoding: [0x62,0xf3,0x1d,0x40,0x43,0x09,0x7b]
+ vshufi32x4 $123, (%rcx), %zmm28, %zmm1
+// CHECK: vshufi32x4 $123, 291(%rax,%r14,8), %zmm28, %zmm1
+// CHECK: encoding: [0x62,0xb3,0x1d,0x40,0x43,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vshufi32x4 $123, 291(%rax,%r14,8), %zmm28, %zmm1
+// CHECK: vshufi32x4 $123, (%rcx){1to16}, %zmm28, %zmm1
+// CHECK: encoding: [0x62,0xf3,0x1d,0x50,0x43,0x09,0x7b]
+ vshufi32x4 $123, (%rcx){1to16}, %zmm28, %zmm1
+// CHECK: vshufi32x4 $123, 8128(%rdx), %zmm28, %zmm1
+// CHECK: encoding: [0x62,0xf3,0x1d,0x40,0x43,0x4a,0x7f,0x7b]
+ vshufi32x4 $123, 8128(%rdx), %zmm28, %zmm1
+// CHECK: vshufi32x4 $123, 8192(%rdx), %zmm28, %zmm1
+// CHECK: encoding: [0x62,0xf3,0x1d,0x40,0x43,0x8a,0x00,0x20,0x00,0x00,0x7b]
+ vshufi32x4 $123, 8192(%rdx), %zmm28, %zmm1
+// CHECK: vshufi32x4 $123, -8192(%rdx), %zmm28, %zmm1
+// CHECK: encoding: [0x62,0xf3,0x1d,0x40,0x43,0x4a,0x80,0x7b]
+ vshufi32x4 $123, -8192(%rdx), %zmm28, %zmm1
+// CHECK: vshufi32x4 $123, -8256(%rdx), %zmm28, %zmm1
+// CHECK: encoding: [0x62,0xf3,0x1d,0x40,0x43,0x8a,0xc0,0xdf,0xff,0xff,0x7b]
+ vshufi32x4 $123, -8256(%rdx), %zmm28, %zmm1
+// CHECK: vshufi32x4 $123, 508(%rdx){1to16}, %zmm28, %zmm1
+// CHECK: encoding: [0x62,0xf3,0x1d,0x50,0x43,0x4a,0x7f,0x7b]
+ vshufi32x4 $123, 508(%rdx){1to16}, %zmm28, %zmm1
+// CHECK: vshufi32x4 $123, 512(%rdx){1to16}, %zmm28, %zmm1
+// CHECK: encoding: [0x62,0xf3,0x1d,0x50,0x43,0x8a,0x00,0x02,0x00,0x00,0x7b]
+ vshufi32x4 $123, 512(%rdx){1to16}, %zmm28, %zmm1
+// CHECK: vshufi32x4 $123, -512(%rdx){1to16}, %zmm28, %zmm1
+// CHECK: encoding: [0x62,0xf3,0x1d,0x50,0x43,0x4a,0x80,0x7b]
+ vshufi32x4 $123, -512(%rdx){1to16}, %zmm28, %zmm1
+// CHECK: vshufi32x4 $123, -516(%rdx){1to16}, %zmm28, %zmm1
+// CHECK: encoding: [0x62,0xf3,0x1d,0x50,0x43,0x8a,0xfc,0xfd,0xff,0xff,0x7b]
+ vshufi32x4 $123, -516(%rdx){1to16}, %zmm28, %zmm1
+// CHECK: vshufi64x2 $171, %zmm19, %zmm16, %zmm3
+// CHECK: encoding: [0x62,0xb3,0xfd,0x40,0x43,0xdb,0xab]
+ vshufi64x2 $171, %zmm19, %zmm16, %zmm3
+// CHECK: vshufi64x2 $171, %zmm19, %zmm16, %zmm3 {%k7}
+// CHECK: encoding: [0x62,0xb3,0xfd,0x47,0x43,0xdb,0xab]
+ vshufi64x2 $171, %zmm19, %zmm16, %zmm3 {%k7}
+// CHECK: vshufi64x2 $171, %zmm19, %zmm16, %zmm3 {%k7} {z}
+// CHECK: encoding: [0x62,0xb3,0xfd,0xc7,0x43,0xdb,0xab]
+ vshufi64x2 $171, %zmm19, %zmm16, %zmm3 {%k7} {z}
+// CHECK: vshufi64x2 $123, %zmm19, %zmm16, %zmm3
+// CHECK: encoding: [0x62,0xb3,0xfd,0x40,0x43,0xdb,0x7b]
+ vshufi64x2 $123, %zmm19, %zmm16, %zmm3
+// CHECK: vshufi64x2 $123, (%rcx), %zmm16, %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x40,0x43,0x19,0x7b]
+ vshufi64x2 $123, (%rcx), %zmm16, %zmm3
+// CHECK: vshufi64x2 $123, 291(%rax,%r14,8), %zmm16, %zmm3
+// CHECK: encoding: [0x62,0xb3,0xfd,0x40,0x43,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vshufi64x2 $123, 291(%rax,%r14,8), %zmm16, %zmm3
+// CHECK: vshufi64x2 $123, (%rcx){1to8}, %zmm16, %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x50,0x43,0x19,0x7b]
+ vshufi64x2 $123, (%rcx){1to8}, %zmm16, %zmm3
+// CHECK: vshufi64x2 $123, 8128(%rdx), %zmm16, %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x40,0x43,0x5a,0x7f,0x7b]
+ vshufi64x2 $123, 8128(%rdx), %zmm16, %zmm3
+// CHECK: vshufi64x2 $123, 8192(%rdx), %zmm16, %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x40,0x43,0x9a,0x00,0x20,0x00,0x00,0x7b]
+ vshufi64x2 $123, 8192(%rdx), %zmm16, %zmm3
+// CHECK: vshufi64x2 $123, -8192(%rdx), %zmm16, %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x40,0x43,0x5a,0x80,0x7b]
+ vshufi64x2 $123, -8192(%rdx), %zmm16, %zmm3
+// CHECK: vshufi64x2 $123, -8256(%rdx), %zmm16, %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x40,0x43,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
+ vshufi64x2 $123, -8256(%rdx), %zmm16, %zmm3
+// CHECK: vshufi64x2 $123, 1016(%rdx){1to8}, %zmm16, %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x50,0x43,0x5a,0x7f,0x7b]
+ vshufi64x2 $123, 1016(%rdx){1to8}, %zmm16, %zmm3
+// CHECK: vshufi64x2 $123, 1024(%rdx){1to8}, %zmm16, %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x50,0x43,0x9a,0x00,0x04,0x00,0x00,0x7b]
+ vshufi64x2 $123, 1024(%rdx){1to8}, %zmm16, %zmm3
+// CHECK: vshufi64x2 $123, -1024(%rdx){1to8}, %zmm16, %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x50,0x43,0x5a,0x80,0x7b]
+ vshufi64x2 $123, -1024(%rdx){1to8}, %zmm16, %zmm3
+// CHECK: vshufi64x2 $123, -1032(%rdx){1to8}, %zmm16, %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x50,0x43,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+ vshufi64x2 $123, -1032(%rdx){1to8}, %zmm16, %zmm3
+// CHECK: vpermps %zmm24, %zmm2, %zmm4
+// CHECK: encoding: [0x62,0x92,0x6d,0x48,0x16,0xe0]
+ vpermps %zmm24, %zmm2, %zmm4
+// CHECK: vpermps %zmm24, %zmm2, %zmm4 {%k4}
+// CHECK: encoding: [0x62,0x92,0x6d,0x4c,0x16,0xe0]
+ vpermps %zmm24, %zmm2, %zmm4 {%k4}
+// CHECK: vpermps %zmm24, %zmm2, %zmm4 {%k4} {z}
+// CHECK: encoding: [0x62,0x92,0x6d,0xcc,0x16,0xe0]
+ vpermps %zmm24, %zmm2, %zmm4 {%k4} {z}
+// CHECK: vpermps (%rcx), %zmm2, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x16,0x21]
+ vpermps (%rcx), %zmm2, %zmm4
+// CHECK: vpermps 291(%rax,%r14,8), %zmm2, %zmm4
+// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x16,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpermps 291(%rax,%r14,8), %zmm2, %zmm4
+// CHECK: vpermps (%rcx){1to16}, %zmm2, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x58,0x16,0x21]
+ vpermps (%rcx){1to16}, %zmm2, %zmm4
+// CHECK: vpermps 8128(%rdx), %zmm2, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x16,0x62,0x7f]
+ vpermps 8128(%rdx), %zmm2, %zmm4
+// CHECK: vpermps 8192(%rdx), %zmm2, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x16,0xa2,0x00,0x20,0x00,0x00]
+ vpermps 8192(%rdx), %zmm2, %zmm4
+// CHECK: vpermps -8192(%rdx), %zmm2, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x16,0x62,0x80]
+ vpermps -8192(%rdx), %zmm2, %zmm4
+// CHECK: vpermps -8256(%rdx), %zmm2, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x16,0xa2,0xc0,0xdf,0xff,0xff]
+ vpermps -8256(%rdx), %zmm2, %zmm4
+// CHECK: vpermps 508(%rdx){1to16}, %zmm2, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x58,0x16,0x62,0x7f]
+ vpermps 508(%rdx){1to16}, %zmm2, %zmm4
+// CHECK: vpermps 512(%rdx){1to16}, %zmm2, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x58,0x16,0xa2,0x00,0x02,0x00,0x00]
+ vpermps 512(%rdx){1to16}, %zmm2, %zmm4
+// CHECK: vpermps -512(%rdx){1to16}, %zmm2, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x58,0x16,0x62,0x80]
+ vpermps -512(%rdx){1to16}, %zmm2, %zmm4
+// CHECK: vpermps -516(%rdx){1to16}, %zmm2, %zmm4
+// CHECK: encoding: [0x62,0xf2,0x6d,0x58,0x16,0xa2,0xfc,0xfd,0xff,0xff]
+ vpermps -516(%rdx){1to16}, %zmm2, %zmm4
+// CHECK: vpermq $171, %zmm4, %zmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x00,0xcc,0xab]
+ vpermq $171, %zmm4, %zmm25
+// CHECK: vpermq $171, %zmm4, %zmm25 {%k6}
+// CHECK: encoding: [0x62,0x63,0xfd,0x4e,0x00,0xcc,0xab]
+ vpermq $171, %zmm4, %zmm25 {%k6}
+// CHECK: vpermq $171, %zmm4, %zmm25 {%k6} {z}
+// CHECK: encoding: [0x62,0x63,0xfd,0xce,0x00,0xcc,0xab]
+ vpermq $171, %zmm4, %zmm25 {%k6} {z}
+// CHECK: vpermq $123, %zmm4, %zmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x00,0xcc,0x7b]
+ vpermq $123, %zmm4, %zmm25
+// CHECK: vpermq $123, (%rcx), %zmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x00,0x09,0x7b]
+ vpermq $123, (%rcx), %zmm25
+// CHECK: vpermq $123, 291(%rax,%r14,8), %zmm25
+// CHECK: encoding: [0x62,0x23,0xfd,0x48,0x00,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpermq $123, 291(%rax,%r14,8), %zmm25
+// CHECK: vpermq $123, (%rcx){1to8}, %zmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x58,0x00,0x09,0x7b]
+ vpermq $123, (%rcx){1to8}, %zmm25
+// CHECK: vpermq $123, 8128(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x00,0x4a,0x7f,0x7b]
+ vpermq $123, 8128(%rdx), %zmm25
+// CHECK: vpermq $123, 8192(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x00,0x8a,0x00,0x20,0x00,0x00,0x7b]
+ vpermq $123, 8192(%rdx), %zmm25
+// CHECK: vpermq $123, -8192(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x00,0x4a,0x80,0x7b]
+ vpermq $123, -8192(%rdx), %zmm25
+// CHECK: vpermq $123, -8256(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x00,0x8a,0xc0,0xdf,0xff,0xff,0x7b]
+ vpermq $123, -8256(%rdx), %zmm25
+// CHECK: vpermq $123, 1016(%rdx){1to8}, %zmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x58,0x00,0x4a,0x7f,0x7b]
+ vpermq $123, 1016(%rdx){1to8}, %zmm25
+// CHECK: vpermq $123, 1024(%rdx){1to8}, %zmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x58,0x00,0x8a,0x00,0x04,0x00,0x00,0x7b]
+ vpermq $123, 1024(%rdx){1to8}, %zmm25
+// CHECK: vpermq $123, -1024(%rdx){1to8}, %zmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x58,0x00,0x4a,0x80,0x7b]
+ vpermq $123, -1024(%rdx){1to8}, %zmm25
+// CHECK: vpermq $123, -1032(%rdx){1to8}, %zmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x58,0x00,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+ vpermq $123, -1032(%rdx){1to8}, %zmm25
+// CHECK: vpermq %zmm22, %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x36,0xee]
+ vpermq %zmm22, %zmm23, %zmm21
+// CHECK: vpermq %zmm22, %zmm23, %zmm21 {%k1}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x41,0x36,0xee]
+ vpermq %zmm22, %zmm23, %zmm21 {%k1}
+// CHECK: vpermq %zmm22, %zmm23, %zmm21 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0xc5,0xc1,0x36,0xee]
+ vpermq %zmm22, %zmm23, %zmm21 {%k1} {z}
+// CHECK: vpermq (%rcx), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x36,0x29]
+ vpermq (%rcx), %zmm23, %zmm21
+// CHECK: vpermq 291(%rax,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x36,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpermq 291(%rax,%r14,8), %zmm23, %zmm21
+// CHECK: vpermq (%rcx){1to8}, %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x50,0x36,0x29]
+ vpermq (%rcx){1to8}, %zmm23, %zmm21
+// CHECK: vpermq 8128(%rdx), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x36,0x6a,0x7f]
+ vpermq 8128(%rdx), %zmm23, %zmm21
+// CHECK: vpermq 8192(%rdx), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x36,0xaa,0x00,0x20,0x00,0x00]
+ vpermq 8192(%rdx), %zmm23, %zmm21
+// CHECK: vpermq -8192(%rdx), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x36,0x6a,0x80]
+ vpermq -8192(%rdx), %zmm23, %zmm21
+// CHECK: vpermq -8256(%rdx), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x36,0xaa,0xc0,0xdf,0xff,0xff]
+ vpermq -8256(%rdx), %zmm23, %zmm21
+// CHECK: vpermq 1016(%rdx){1to8}, %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x50,0x36,0x6a,0x7f]
+ vpermq 1016(%rdx){1to8}, %zmm23, %zmm21
+// CHECK: vpermq 1024(%rdx){1to8}, %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x50,0x36,0xaa,0x00,0x04,0x00,0x00]
+ vpermq 1024(%rdx){1to8}, %zmm23, %zmm21
+// CHECK: vpermq -1024(%rdx){1to8}, %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x50,0x36,0x6a,0x80]
+ vpermq -1024(%rdx){1to8}, %zmm23, %zmm21
+// CHECK: vpermq -1032(%rdx){1to8}, %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x50,0x36,0xaa,0xf8,0xfb,0xff,0xff]
+ vpermq -1032(%rdx){1to8}, %zmm23, %zmm21
+// CHECK: vpermpd %zmm18, %zmm29, %zmm26
+// CHECK: encoding: [0x62,0x22,0x95,0x40,0x16,0xd2]
+ vpermpd %zmm18, %zmm29, %zmm26
+// CHECK: vpermpd %zmm18, %zmm29, %zmm26 {%k6}
+// CHECK: encoding: [0x62,0x22,0x95,0x46,0x16,0xd2]
+ vpermpd %zmm18, %zmm29, %zmm26 {%k6}
+// CHECK: vpermpd %zmm18, %zmm29, %zmm26 {%k6} {z}
+// CHECK: encoding: [0x62,0x22,0x95,0xc6,0x16,0xd2]
+ vpermpd %zmm18, %zmm29, %zmm26 {%k6} {z}
+// CHECK: vpermpd (%rcx), %zmm29, %zmm26
+// CHECK: encoding: [0x62,0x62,0x95,0x40,0x16,0x11]
+ vpermpd (%rcx), %zmm29, %zmm26
+// CHECK: vpermpd 291(%rax,%r14,8), %zmm29, %zmm26
+// CHECK: encoding: [0x62,0x22,0x95,0x40,0x16,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpermpd 291(%rax,%r14,8), %zmm29, %zmm26
+// CHECK: vpermpd (%rcx){1to8}, %zmm29, %zmm26
+// CHECK: encoding: [0x62,0x62,0x95,0x50,0x16,0x11]
+ vpermpd (%rcx){1to8}, %zmm29, %zmm26
+// CHECK: vpermpd 8128(%rdx), %zmm29, %zmm26
+// CHECK: encoding: [0x62,0x62,0x95,0x40,0x16,0x52,0x7f]
+ vpermpd 8128(%rdx), %zmm29, %zmm26
+// CHECK: vpermpd 8192(%rdx), %zmm29, %zmm26
+// CHECK: encoding: [0x62,0x62,0x95,0x40,0x16,0x92,0x00,0x20,0x00,0x00]
+ vpermpd 8192(%rdx), %zmm29, %zmm26
+// CHECK: vpermpd -8192(%rdx), %zmm29, %zmm26
+// CHECK: encoding: [0x62,0x62,0x95,0x40,0x16,0x52,0x80]
+ vpermpd -8192(%rdx), %zmm29, %zmm26
+// CHECK: vpermpd -8256(%rdx), %zmm29, %zmm26
+// CHECK: encoding: [0x62,0x62,0x95,0x40,0x16,0x92,0xc0,0xdf,0xff,0xff]
+ vpermpd -8256(%rdx), %zmm29, %zmm26
+// CHECK: vpermpd 1016(%rdx){1to8}, %zmm29, %zmm26
+// CHECK: encoding: [0x62,0x62,0x95,0x50,0x16,0x52,0x7f]
+ vpermpd 1016(%rdx){1to8}, %zmm29, %zmm26
+// CHECK: vpermpd 1024(%rdx){1to8}, %zmm29, %zmm26
+// CHECK: encoding: [0x62,0x62,0x95,0x50,0x16,0x92,0x00,0x04,0x00,0x00]
+ vpermpd 1024(%rdx){1to8}, %zmm29, %zmm26
+// CHECK: vpermpd -1024(%rdx){1to8}, %zmm29, %zmm26
+// CHECK: encoding: [0x62,0x62,0x95,0x50,0x16,0x52,0x80]
+ vpermpd -1024(%rdx){1to8}, %zmm29, %zmm26
+// CHECK: vpermpd -1032(%rdx){1to8}, %zmm29, %zmm26
+// CHECK: encoding: [0x62,0x62,0x95,0x50,0x16,0x92,0xf8,0xfb,0xff,0xff]
+ vpermpd -1032(%rdx){1to8}, %zmm29, %zmm26
+// CHECK: vpermpd $171, %zmm27, %zmm3
+// CHECK: encoding: [0x62,0x93,0xfd,0x48,0x01,0xdb,0xab]
+ vpermpd $0xab, %zmm27, %zmm3
+// CHECK: vpermpd $171, %zmm27, %zmm3 {%k2}
+// CHECK: encoding: [0x62,0x93,0xfd,0x4a,0x01,0xdb,0xab]
+ vpermpd $0xab, %zmm27, %zmm3 {%k2}
+// CHECK: vpermpd $171, %zmm27, %zmm3 {%k2} {z}
+// CHECK: encoding: [0x62,0x93,0xfd,0xca,0x01,0xdb,0xab]
+ vpermpd $0xab, %zmm27, %zmm3 {%k2} {z}
+// CHECK: vpermpd $123, %zmm27, %zmm3
+// CHECK: encoding: [0x62,0x93,0xfd,0x48,0x01,0xdb,0x7b]
+ vpermpd $0x7b, %zmm27, %zmm3
+// CHECK: vpermpd $123, (%rcx), %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x01,0x19,0x7b]
+ vpermpd $0x7b, (%rcx), %zmm3
+// CHECK: vpermpd $123, 291(%rax,%r14,8), %zmm3
+// CHECK: encoding: [0x62,0xb3,0xfd,0x48,0x01,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpermpd $0x7b, 291(%rax,%r14,8), %zmm3
+// CHECK: vpermpd $123, (%rcx){1to8}, %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x01,0x19,0x7b]
+ vpermpd $0x7b, (%rcx){1to8}, %zmm3
+// CHECK: vpermpd $123, 8128(%rdx), %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x01,0x5a,0x7f,0x7b]
+ vpermpd $0x7b, 8128(%rdx), %zmm3
+// CHECK: vpermpd $123, 8192(%rdx), %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x01,0x9a,0x00,0x20,0x00,0x00,0x7b]
+ vpermpd $0x7b, 8192(%rdx), %zmm3
+// CHECK: vpermpd $123, -8192(%rdx), %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x01,0x5a,0x80,0x7b]
+ vpermpd $0x7b, -8192(%rdx), %zmm3
+// CHECK: vpermpd $123, -8256(%rdx), %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x01,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
+ vpermpd $0x7b, -8256(%rdx), %zmm3
+// CHECK: vpermpd $123, 1016(%rdx){1to8}, %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x01,0x5a,0x7f,0x7b]
+ vpermpd $0x7b, 1016(%rdx){1to8}, %zmm3
+// CHECK: vpermpd $123, 1024(%rdx){1to8}, %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x01,0x9a,0x00,0x04,0x00,0x00,0x7b]
+ vpermpd $0x7b, 1024(%rdx){1to8}, %zmm3
+// CHECK: vpermpd $123, -1024(%rdx){1to8}, %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x01,0x5a,0x80,0x7b]
+ vpermpd $0x7b, -1024(%rdx){1to8}, %zmm3
+// CHECK: vpermpd $123, -1032(%rdx){1to8}, %zmm3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x01,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+ vpermpd $0x7b, -1032(%rdx){1to8}, %zmm3
+// CHECK: vpermd %zmm9, %zmm28, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x1d,0x40,0x36,0xf1]
+ vpermd %zmm9, %zmm28, %zmm22
+// CHECK: vpermd %zmm9, %zmm28, %zmm22 {%k1}
+// CHECK: encoding: [0x62,0xc2,0x1d,0x41,0x36,0xf1]
+ vpermd %zmm9, %zmm28, %zmm22 {%k1}
+// CHECK: vpermd %zmm9, %zmm28, %zmm22 {%k1} {z}
+// CHECK: encoding: [0x62,0xc2,0x1d,0xc1,0x36,0xf1]
+ vpermd %zmm9, %zmm28, %zmm22 {%k1} {z}
+// CHECK: vpermd (%rcx), %zmm28, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x40,0x36,0x31]
+ vpermd (%rcx), %zmm28, %zmm22
+// CHECK: vpermd 291(%rax,%r14,8), %zmm28, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x1d,0x40,0x36,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpermd 291(%rax,%r14,8), %zmm28, %zmm22
+// CHECK: vpermd (%rcx){1to16}, %zmm28, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x50,0x36,0x31]
+ vpermd (%rcx){1to16}, %zmm28, %zmm22
+// CHECK: vpermd 8128(%rdx), %zmm28, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x40,0x36,0x72,0x7f]
+ vpermd 8128(%rdx), %zmm28, %zmm22
+// CHECK: vpermd 8192(%rdx), %zmm28, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x40,0x36,0xb2,0x00,0x20,0x00,0x00]
+ vpermd 8192(%rdx), %zmm28, %zmm22
+// CHECK: vpermd -8192(%rdx), %zmm28, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x40,0x36,0x72,0x80]
+ vpermd -8192(%rdx), %zmm28, %zmm22
+// CHECK: vpermd -8256(%rdx), %zmm28, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x40,0x36,0xb2,0xc0,0xdf,0xff,0xff]
+ vpermd -8256(%rdx), %zmm28, %zmm22
+// CHECK: vpermd 508(%rdx){1to16}, %zmm28, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x50,0x36,0x72,0x7f]
+ vpermd 508(%rdx){1to16}, %zmm28, %zmm22
+// CHECK: vpermd 512(%rdx){1to16}, %zmm28, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x50,0x36,0xb2,0x00,0x02,0x00,0x00]
+ vpermd 512(%rdx){1to16}, %zmm28, %zmm22
+// CHECK: vpermd -512(%rdx){1to16}, %zmm28, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x50,0x36,0x72,0x80]
+ vpermd -512(%rdx){1to16}, %zmm28, %zmm22
+// CHECK: vpermd -516(%rdx){1to16}, %zmm28, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x1d,0x50,0x36,0xb2,0xfc,0xfd,0xff,0xff]
+ vpermd -516(%rdx){1to16}, %zmm28, %zmm22
diff --git a/test/MC/X86/hex-immediates.s b/test/MC/X86/hex-immediates.s
new file mode 100644
index 0000000..03f3042
--- /dev/null
+++ b/test/MC/X86/hex-immediates.s
@@ -0,0 +1,10 @@
+# RUN: llvm-mc -filetype=obj %s -triple=x86_64-apple-darwin9 | llvm-objdump -d --print-imm-hex - | FileCheck %s
+# CHECK: movabsq $0x7fffffffffffffff, %rcx
+movabsq $0x7fffffffffffffff, %rcx
+# CHECK: leaq 0x3e2(%rip), %rdi
+leaq 0x3e2(%rip), %rdi
+# CHECK: subq $0x40, %rsp
+subq $0x40, %rsp
+# CHECK: leal (,%r14,4), %eax
+leal (,%r14,4), %eax
diff --git a/test/MC/X86/intel-syntax-avx512.s b/test/MC/X86/intel-syntax-avx512.s
index ffdbd20..86a1af8 100644
--- a/test/MC/X86/intel-syntax-avx512.s
+++ b/test/MC/X86/intel-syntax-avx512.s
@@ -161,15 +161,98 @@ vaddpd zmm1,zmm1,zmm2,{rz-sae}
vcmpps k2,zmm17,DWORD PTR [rdx-0x204]{1to16},0x7b
+// CHECK: vfixupimmss xmm15 , xmm18, xmm28, 171
+// CHECK: encoding: [0x62,0x13,0x6d,0x00,0x55,0xfc,0xab]
+ vfixupimmss xmm15,xmm18,xmm28,0xab
+// CHECK: vfixupimmss xmm15 {k5}, xmm18, xmm28, 171
+// CHECK: encoding: [0x62,0x13,0x6d,0x05,0x55,0xfc,0xab]
+ vfixupimmss xmm15{k5},xmm18,xmm28,0xab
+// CHECK: vfixupimmss xmm15 {k5} {z}, xmm18, xmm28, 171
+// CHECK: encoding: [0x62,0x13,0x6d,0x85,0x55,0xfc,0xab]
+ vfixupimmss xmm15{k5} {z},xmm18,xmm28,0xab
+// CHECK: vfixupimmss xmm15 , xmm18, xmm28,{sae}, 171
+// CHECK: encoding: [0x62,0x13,0x6d,0x10,0x55,0xfc,0xab]
+ vfixupimmss xmm15,xmm18,xmm28,{sae},0xab
+// CHECK: vfixupimmss xmm15 , xmm18, xmm28, 123
+// CHECK: encoding: [0x62,0x13,0x6d,0x00,0x55,0xfc,0x7b]
+ vfixupimmss xmm15,xmm18,xmm28,0x7b
+// CHECK: vfixupimmss xmm15 , xmm18, xmm28,{sae}, 123
+// CHECK: encoding: [0x62,0x13,0x6d,0x10,0x55,0xfc,0x7b]
+ vfixupimmss xmm15,xmm18,xmm28,{sae},0x7b
+// CHECK: vfixupimmss xmm15 , xmm18, dword ptr [rcx], 123
+// CHECK: encoding: [0x62,0x73,0x6d,0x00,0x55,0x39,0x7b]
+ vfixupimmss xmm15,xmm18,DWORD PTR [rcx],0x7b
+// CHECK: vfixupimmss xmm15 , xmm18, dword ptr [rax + 8*r14 + 291], 123
+// CHECK: encoding: [0x62,0x33,0x6d,0x00,0x55,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vfixupimmss xmm15,xmm18,DWORD PTR [rax+r14*8+0x123],0x7b
+// CHECK: vfixupimmss xmm15 , xmm18, dword ptr [rdx + 508], 123
+// CHECK: encoding: [0x62,0x73,0x6d,0x00,0x55,0x7a,0x7f,0x7b]
+ vfixupimmss xmm15,xmm18,DWORD PTR [rdx+0x1fc],0x7b
+// CHECK: vfixupimmss xmm15 , xmm18, dword ptr [rdx + 512], 123
+// CHECK: encoding: [0x62,0x73,0x6d,0x00,0x55,0xba,0x00,0x02,0x00,0x00,0x7b]
+ vfixupimmss xmm15,xmm18,DWORD PTR [rdx+0x200],0x7b
+// CHECK: vfixupimmss xmm15 , xmm18, dword ptr [rdx - 512], 123
+// CHECK: encoding: [0x62,0x73,0x6d,0x00,0x55,0x7a,0x80,0x7b]
+ vfixupimmss xmm15,xmm18,DWORD PTR [rdx-0x200],0x7b
+// CHECK: vfixupimmss xmm15 , xmm18, dword ptr [rdx - 516], 123
+// CHECK: encoding: [0x62,0x73,0x6d,0x00,0x55,0xba,0xfc,0xfd,0xff,0xff,0x7b]
+ vfixupimmss xmm15,xmm18,DWORD PTR [rdx-0x204],0x7b
+// CHECK: vfixupimmsd xmm13 , xmm26, xmm5, 171
+// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0xed,0xab]
+ vfixupimmsd xmm13,xmm26,xmm5,0xab
+// CHECK: vfixupimmsd xmm13 {k6}, xmm26, xmm5, 171
+// CHECK: encoding: [0x62,0x73,0xad,0x06,0x55,0xed,0xab]
+ vfixupimmsd xmm13{k6},xmm26,xmm5,0xab
+// CHECK: vfixupimmsd xmm13 {k6} {z}, xmm26, xmm5, 171
+// CHECK: encoding: [0x62,0x73,0xad,0x86,0x55,0xed,0xab]
+ vfixupimmsd xmm13{k6} {z},xmm26,xmm5,0xab
+// CHECK: vfixupimmsd xmm13 , xmm26, xmm5,{sae}, 171
+// CHECK: encoding: [0x62,0x73,0xad,0x10,0x55,0xed,0xab]
+ vfixupimmsd xmm13,xmm26,xmm5,{sae},0xab
+// CHECK: vfixupimmsd xmm13 , xmm26, xmm5, 123
+// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0xed,0x7b]
+ vfixupimmsd xmm13,xmm26,xmm5,0x7b
+// CHECK: vfixupimmsd xmm13 , xmm26, xmm5,{sae}, 123
+// CHECK: encoding: [0x62,0x73,0xad,0x10,0x55,0xed,0x7b]
+ vfixupimmsd xmm13,xmm26,xmm5,{sae},0x7b
+// CHECK: vfixupimmsd xmm13 , xmm26, qword ptr [rcx], 123
+// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0x29,0x7b]
+ vfixupimmsd xmm13,xmm26,QWORD PTR [rcx],0x7b
+// CHECK: vfixupimmsd xmm13 , xmm26, qword ptr [rax + 8*r14 + 291], 123
+// CHECK: encoding: [0x62,0x33,0xad,0x00,0x55,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vfixupimmsd xmm13,xmm26,QWORD PTR [rax+r14*8+0x123],0x7b
+// CHECK: vfixupimmsd xmm13 , xmm26, qword ptr [rdx + 1016], 123
+// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0x6a,0x7f,0x7b]
+ vfixupimmsd xmm13,xmm26,QWORD PTR [rdx+0x3f8],0x7b
+// CHECK: vfixupimmsd xmm13 , xmm26, qword ptr [rdx + 1024], 123
+// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0xaa,0x00,0x04,0x00,0x00,0x7b]
+ vfixupimmsd xmm13,xmm26,QWORD PTR [rdx+0x400],0x7b
+// CHECK: vfixupimmsd xmm13 , xmm26, qword ptr [rdx - 1024], 123
+// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0x6a,0x80,0x7b]
+ vfixupimmsd xmm13,xmm26,QWORD PTR [rdx-0x400],0x7b
+// CHECK: vfixupimmsd xmm13 , xmm26, qword ptr [rdx - 1032], 123
+// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
+ vfixupimmsd xmm13,xmm26,QWORD PTR [rdx-0x408],0x7b
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
index fce0c65..30fe6c8 100644
--- a/test/MC/X86/intel-syntax.s
+++ b/test/MC/X86/intel-syntax.s
@@ -662,3 +662,6 @@ frstor dword ptr [eax]
// CHECK: fnsave (%eax)
// CHECK: fxrstor (%eax)
// CHECK: frstor (%eax)
+// CHECK: cmpnless %xmm1, %xmm0
+cmpnless xmm0, xmm1
diff --git a/test/MC/X86/mpx-encodings.s b/test/MC/X86/mpx-encodings.s
new file mode 100644
index 0000000..6fe4e0f
--- /dev/null
+++ b/test/MC/X86/mpx-encodings.s
@@ -0,0 +1,38 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl --show-encoding %s | FileCheck %s
+// CHECK: bndmk (%rax), %bnd0
+// CHECK: encoding: [0xf3,0x48,0x0f,0x1b,0x00]
+bndmk (%rax), %bnd0
+// CHECK: bndmk 1024(%rax), %bnd1
+// CHECK: encoding: [0xf3,0x48,0x0f,0x1b,0x88,0x00,0x04,0x00,0x00]
+bndmk 1024(%rax), %bnd1
+// CHECK: bndmov %bnd2, %bnd1
+// CHECK: encoding: [0x66,0x0f,0x1b,0xd1]
+bndmov %bnd2, %bnd1
+// CHECK: bndmov %bnd1, 1024(%r9)
+// CHECK: encoding: [0x66,0x49,0x0f,0x1b,0x89,0x00,0x04,0x00,0x00]
+bndmov %bnd1, 1024(%r9)
+// CHECK: bndstx %bnd1, 1024(%rax)
+// CHECK: encoding: [0x0f,0x1b,0x88,0x00,0x04,0x00,0x00]
+bndstx %bnd1, 1024(%rax)
+// CHECK: bndldx 1024(%r8), %bnd1
+// CHECK: encoding: [0x41,0x0f,0x1a,0x88,0x00,0x04,0x00,0x00]
+bndldx 1024(%r8), %bnd1
+// CHECK: bndcl 121(%r10), %bnd1
+// CHECK: encoding: [0xf3,0x49,0x0f,0x1a,0x4a,0x79]
+bndcl 121(%r10), %bnd1
+// CHECK: bndcn 121(%rcx), %bnd3
+// CHECK: encoding: [0xf2,0x48,0x0f,0x1b,0x59,0x79]
+bndcn 121(%rcx), %bnd3
+// CHECK: bndcu %rdx, %bnd3
+// CHECK: encoding: [0xf2,0x48,0x0f,0x1a,0xda]
+bndcu %rdx, %bnd3
diff --git a/test/MC/X86/x86-64-avx512bw.s b/test/MC/X86/x86-64-avx512bw.s
index ba043da..b81e3ad 100644
--- a/test/MC/X86/x86-64-avx512bw.s
+++ b/test/MC/X86/x86-64-avx512bw.s
@@ -3308,3 +3308,38 @@
// CHECK: encoding: [0x62,0xe1,0x5d,0x40,0xd9,0xa2,0xc0,0xdf,0xff,0xff]
vpsubusw -8256(%rdx), %zmm20, %zmm20
+// CHECK: vpermw %zmm21, %zmm19, %zmm22
+// CHECK: encoding: [0x62,0xa2,0xe5,0x40,0x8d,0xf5]
+ vpermw %zmm21, %zmm19, %zmm22
+// CHECK: vpermw %zmm21, %zmm19, %zmm22 {%k6}
+// CHECK: encoding: [0x62,0xa2,0xe5,0x46,0x8d,0xf5]
+ vpermw %zmm21, %zmm19, %zmm22 {%k6}
+// CHECK: vpermw %zmm21, %zmm19, %zmm22 {%k6} {z}
+// CHECK: encoding: [0x62,0xa2,0xe5,0xc6,0x8d,0xf5]
+ vpermw %zmm21, %zmm19, %zmm22 {%k6} {z}
+// CHECK: vpermw (%rcx), %zmm19, %zmm22
+// CHECK: encoding: [0x62,0xe2,0xe5,0x40,0x8d,0x31]
+ vpermw (%rcx), %zmm19, %zmm22
+// CHECK: vpermw 291(%rax,%r14,8), %zmm19, %zmm22
+// CHECK: encoding: [0x62,0xa2,0xe5,0x40,0x8d,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpermw 291(%rax,%r14,8), %zmm19, %zmm22
+// CHECK: vpermw 8128(%rdx), %zmm19, %zmm22
+// CHECK: encoding: [0x62,0xe2,0xe5,0x40,0x8d,0x72,0x7f]
+ vpermw 8128(%rdx), %zmm19, %zmm22
+// CHECK: vpermw 8192(%rdx), %zmm19, %zmm22
+// CHECK: encoding: [0x62,0xe2,0xe5,0x40,0x8d,0xb2,0x00,0x20,0x00,0x00]
+ vpermw 8192(%rdx), %zmm19, %zmm22
+// CHECK: vpermw -8192(%rdx), %zmm19, %zmm22
+// CHECK: encoding: [0x62,0xe2,0xe5,0x40,0x8d,0x72,0x80]
+ vpermw -8192(%rdx), %zmm19, %zmm22
+// CHECK: vpermw -8256(%rdx), %zmm19, %zmm22
+// CHECK: encoding: [0x62,0xe2,0xe5,0x40,0x8d,0xb2,0xc0,0xdf,0xff,0xff]
+ vpermw -8256(%rdx), %zmm19, %zmm22
diff --git a/test/MC/X86/x86-64-avx512bw_vl.s b/test/MC/X86/x86-64-avx512bw_vl.s
index bd16b0e..0ba5e17 100644
--- a/test/MC/X86/x86-64-avx512bw_vl.s
+++ b/test/MC/X86/x86-64-avx512bw_vl.s
@@ -5775,3 +5775,164 @@
// CHECK: vpsubusw -4128(%rdx), %ymm25, %ymm27
// CHECK: encoding: [0x62,0x61,0x35,0x20,0xd9,0x9a,0xe0,0xef,0xff,0xff]
vpsubusw -4128(%rdx), %ymm25, %ymm27
+// CHECK: vpshufhw $171, %xmm19, %xmm23
+// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x70,0xfb,0xab]
+ vpshufhw $171, %xmm19, %xmm23
+// CHECK: vpshufhw $171, %xmm19, %xmm23 {%k7}
+// CHECK: encoding: [0x62,0xa1,0xfe,0x0f,0x70,0xfb,0xab]
+ vpshufhw $171, %xmm19, %xmm23 {%k7}
+// CHECK: vpshufhw $171, %xmm19, %xmm23 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0xfe,0x8f,0x70,0xfb,0xab]
+ vpshufhw $171, %xmm19, %xmm23 {%k7} {z}
+// CHECK: vpshufhw $123, %xmm19, %xmm23
+// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x70,0xfb,0x7b]
+ vpshufhw $123, %xmm19, %xmm23
+// CHECK: vpshufhw $123, (%rcx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x70,0x39,0x7b]
+ vpshufhw $123, (%rcx), %xmm23
+// CHECK: vpshufhw $123, 291(%rax,%r14,8), %xmm23
+// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x70,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpshufhw $123, 291(%rax,%r14,8), %xmm23
+// CHECK: vpshufhw $123, 2032(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x70,0x7a,0x7f,0x7b]
+ vpshufhw $123, 2032(%rdx), %xmm23
+// CHECK: vpshufhw $123, 2048(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x70,0xba,0x00,0x08,0x00,0x00,0x7b]
+ vpshufhw $123, 2048(%rdx), %xmm23
+// CHECK: vpshufhw $123, -2048(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x70,0x7a,0x80,0x7b]
+ vpshufhw $123, -2048(%rdx), %xmm23
+// CHECK: vpshufhw $123, -2064(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x70,0xba,0xf0,0xf7,0xff,0xff,0x7b]
+ vpshufhw $123, -2064(%rdx), %xmm23
+// CHECK: vpshufhw $171, %ymm17, %ymm29
+// CHECK: encoding: [0x62,0x21,0xfe,0x28,0x70,0xe9,0xab]
+ vpshufhw $171, %ymm17, %ymm29
+// CHECK: vpshufhw $171, %ymm17, %ymm29 {%k7}
+// CHECK: encoding: [0x62,0x21,0xfe,0x2f,0x70,0xe9,0xab]
+ vpshufhw $171, %ymm17, %ymm29 {%k7}
+// CHECK: vpshufhw $171, %ymm17, %ymm29 {%k7} {z}
+// CHECK: encoding: [0x62,0x21,0xfe,0xaf,0x70,0xe9,0xab]
+ vpshufhw $171, %ymm17, %ymm29 {%k7} {z}
+// CHECK: vpshufhw $123, %ymm17, %ymm29
+// CHECK: encoding: [0x62,0x21,0xfe,0x28,0x70,0xe9,0x7b]
+ vpshufhw $123, %ymm17, %ymm29
+// CHECK: vpshufhw $123, (%rcx), %ymm29
+// CHECK: encoding: [0x62,0x61,0xfe,0x28,0x70,0x29,0x7b]
+ vpshufhw $123, (%rcx), %ymm29
+// CHECK: vpshufhw $123, 291(%rax,%r14,8), %ymm29
+// CHECK: encoding: [0x62,0x21,0xfe,0x28,0x70,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpshufhw $123, 291(%rax,%r14,8), %ymm29
+// CHECK: vpshufhw $123, 4064(%rdx), %ymm29
+// CHECK: encoding: [0x62,0x61,0xfe,0x28,0x70,0x6a,0x7f,0x7b]
+ vpshufhw $123, 4064(%rdx), %ymm29
+// CHECK: vpshufhw $123, 4096(%rdx), %ymm29
+// CHECK: encoding: [0x62,0x61,0xfe,0x28,0x70,0xaa,0x00,0x10,0x00,0x00,0x7b]
+ vpshufhw $123, 4096(%rdx), %ymm29
+// CHECK: vpshufhw $123, -4096(%rdx), %ymm29
+// CHECK: encoding: [0x62,0x61,0xfe,0x28,0x70,0x6a,0x80,0x7b]
+ vpshufhw $123, -4096(%rdx), %ymm29
+// CHECK: vpshufhw $123, -4128(%rdx), %ymm29
+// CHECK: encoding: [0x62,0x61,0xfe,0x28,0x70,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+ vpshufhw $123, -4128(%rdx), %ymm29
+// CHECK: vpshuflw $171, %xmm27, %xmm30
+// CHECK: encoding: [0x62,0x01,0xff,0x08,0x70,0xf3,0xab]
+ vpshuflw $171, %xmm27, %xmm30
+// CHECK: vpshuflw $171, %xmm27, %xmm30 {%k6}
+// CHECK: encoding: [0x62,0x01,0xff,0x0e,0x70,0xf3,0xab]
+ vpshuflw $171, %xmm27, %xmm30 {%k6}
+// CHECK: vpshuflw $171, %xmm27, %xmm30 {%k6} {z}
+// CHECK: encoding: [0x62,0x01,0xff,0x8e,0x70,0xf3,0xab]
+ vpshuflw $171, %xmm27, %xmm30 {%k6} {z}
+// CHECK: vpshuflw $123, %xmm27, %xmm30
+// CHECK: encoding: [0x62,0x01,0xff,0x08,0x70,0xf3,0x7b]
+ vpshuflw $123, %xmm27, %xmm30
+// CHECK: vpshuflw $123, (%rcx), %xmm30
+// CHECK: encoding: [0x62,0x61,0xff,0x08,0x70,0x31,0x7b]
+ vpshuflw $123, (%rcx), %xmm30
+// CHECK: vpshuflw $123, 291(%rax,%r14,8), %xmm30
+// CHECK: encoding: [0x62,0x21,0xff,0x08,0x70,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpshuflw $123, 291(%rax,%r14,8), %xmm30
+// CHECK: vpshuflw $123, 2032(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x61,0xff,0x08,0x70,0x72,0x7f,0x7b]
+ vpshuflw $123, 2032(%rdx), %xmm30
+// CHECK: vpshuflw $123, 2048(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x61,0xff,0x08,0x70,0xb2,0x00,0x08,0x00,0x00,0x7b]
+ vpshuflw $123, 2048(%rdx), %xmm30
+// CHECK: vpshuflw $123, -2048(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x61,0xff,0x08,0x70,0x72,0x80,0x7b]
+ vpshuflw $123, -2048(%rdx), %xmm30
+// CHECK: vpshuflw $123, -2064(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x61,0xff,0x08,0x70,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
+ vpshuflw $123, -2064(%rdx), %xmm30
+// CHECK: vpshuflw $171, %ymm25, %ymm25
+// CHECK: encoding: [0x62,0x01,0xff,0x28,0x70,0xc9,0xab]
+ vpshuflw $171, %ymm25, %ymm25
+// CHECK: vpshuflw $171, %ymm25, %ymm25 {%k5}
+// CHECK: encoding: [0x62,0x01,0xff,0x2d,0x70,0xc9,0xab]
+ vpshuflw $171, %ymm25, %ymm25 {%k5}
+// CHECK: vpshuflw $171, %ymm25, %ymm25 {%k5} {z}
+// CHECK: encoding: [0x62,0x01,0xff,0xad,0x70,0xc9,0xab]
+ vpshuflw $171, %ymm25, %ymm25 {%k5} {z}
+// CHECK: vpshuflw $123, %ymm25, %ymm25
+// CHECK: encoding: [0x62,0x01,0xff,0x28,0x70,0xc9,0x7b]
+ vpshuflw $123, %ymm25, %ymm25
+// CHECK: vpshuflw $123, (%rcx), %ymm25
+// CHECK: encoding: [0x62,0x61,0xff,0x28,0x70,0x09,0x7b]
+ vpshuflw $123, (%rcx), %ymm25
+// CHECK: vpshuflw $123, 291(%rax,%r14,8), %ymm25
+// CHECK: encoding: [0x62,0x21,0xff,0x28,0x70,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpshuflw $123, 291(%rax,%r14,8), %ymm25
+// CHECK: vpshuflw $123, 4064(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x61,0xff,0x28,0x70,0x4a,0x7f,0x7b]
+ vpshuflw $123, 4064(%rdx), %ymm25
+// CHECK: vpshuflw $123, 4096(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x61,0xff,0x28,0x70,0x8a,0x00,0x10,0x00,0x00,0x7b]
+ vpshuflw $123, 4096(%rdx), %ymm25
+// CHECK: vpshuflw $123, -4096(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x61,0xff,0x28,0x70,0x4a,0x80,0x7b]
+ vpshuflw $123, -4096(%rdx), %ymm25
+// CHECK: vpshuflw $123, -4128(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x61,0xff,0x28,0x70,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+ vpshuflw $123, -4128(%rdx), %ymm25
diff --git a/test/MC/X86/x86-64-avx512dq.s b/test/MC/X86/x86-64-avx512dq.s
index 7d33abd..92656dd 100644
--- a/test/MC/X86/x86-64-avx512dq.s
+++ b/test/MC/X86/x86-64-avx512dq.s
@@ -1158,3 +1158,235 @@
// CHECK: vbroadcasti64x2 -2064(%rdx), %zmm20
// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x5a,0xa2,0xf0,0xf7,0xff,0xff]
vbroadcasti64x2 -2064(%rdx), %zmm20
+// CHECK: vrangepd $171, %zmm17, %zmm19, %zmm17
+// CHECK: encoding: [0x62,0xa3,0xe5,0x40,0x50,0xc9,0xab]
+ vrangepd $0xab, %zmm17, %zmm19, %zmm17
+// CHECK: vrangepd $171, %zmm17, %zmm19, %zmm17 {%k6}
+// CHECK: encoding: [0x62,0xa3,0xe5,0x46,0x50,0xc9,0xab]
+ vrangepd $0xab, %zmm17, %zmm19, %zmm17 {%k6}
+// CHECK: vrangepd $171, %zmm17, %zmm19, %zmm17 {%k6} {z}
+// CHECK: encoding: [0x62,0xa3,0xe5,0xc6,0x50,0xc9,0xab]
+ vrangepd $0xab, %zmm17, %zmm19, %zmm17 {%k6} {z}
+// CHECK: vrangepd $171,{sae}, %zmm17, %zmm19, %zmm17
+// CHECK: encoding: [0x62,0xa3,0xe5,0x10,0x50,0xc9,0xab]
+ vrangepd $0xab,{sae}, %zmm17, %zmm19, %zmm17
+// CHECK: vrangepd $123, %zmm17, %zmm19, %zmm17
+// CHECK: encoding: [0x62,0xa3,0xe5,0x40,0x50,0xc9,0x7b]
+ vrangepd $0x7b, %zmm17, %zmm19, %zmm17
+// CHECK: vrangepd $123,{sae}, %zmm17, %zmm19, %zmm17
+// CHECK: encoding: [0x62,0xa3,0xe5,0x10,0x50,0xc9,0x7b]
+ vrangepd $0x7b,{sae}, %zmm17, %zmm19, %zmm17
+// CHECK: vrangepd $123, (%rcx), %zmm19, %zmm17
+// CHECK: encoding: [0x62,0xe3,0xe5,0x40,0x50,0x09,0x7b]
+ vrangepd $0x7b,(%rcx), %zmm19, %zmm17
+// CHECK: vrangepd $123, 291(%rax,%r14,8), %zmm19, %zmm17
+// CHECK: encoding: [0x62,0xa3,0xe5,0x40,0x50,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vrangepd $0x7b,291(%rax,%r14,8), %zmm19, %zmm17
+// CHECK: vrangepd $123, (%rcx){1to8}, %zmm19, %zmm17
+// CHECK: encoding: [0x62,0xe3,0xe5,0x50,0x50,0x09,0x7b]
+ vrangepd $0x7b,(%rcx){1to8}, %zmm19, %zmm17
+// CHECK: vrangepd $123, 8128(%rdx), %zmm19, %zmm17
+// CHECK: encoding: [0x62,0xe3,0xe5,0x40,0x50,0x4a,0x7f,0x7b]
+ vrangepd $0x7b,8128(%rdx), %zmm19, %zmm17
+// CHECK: vrangepd $123, 8192(%rdx), %zmm19, %zmm17
+// CHECK: encoding: [0x62,0xe3,0xe5,0x40,0x50,0x8a,0x00,0x20,0x00,0x00,0x7b]
+ vrangepd $0x7b,8192(%rdx), %zmm19, %zmm17
+// CHECK: vrangepd $123, -8192(%rdx), %zmm19, %zmm17
+// CHECK: encoding: [0x62,0xe3,0xe5,0x40,0x50,0x4a,0x80,0x7b]
+ vrangepd $0x7b,-8192(%rdx), %zmm19, %zmm17
+// CHECK: vrangepd $123, -8256(%rdx), %zmm19, %zmm17
+// CHECK: encoding: [0x62,0xe3,0xe5,0x40,0x50,0x8a,0xc0,0xdf,0xff,0xff,0x7b]
+ vrangepd $0x7b,-8256(%rdx), %zmm19, %zmm17
+// CHECK: vrangepd $123, 1016(%rdx){1to8}, %zmm19, %zmm17
+// CHECK: encoding: [0x62,0xe3,0xe5,0x50,0x50,0x4a,0x7f,0x7b]
+ vrangepd $0x7b,1016(%rdx){1to8}, %zmm19, %zmm17
+// CHECK: vrangepd $123, 1024(%rdx){1to8}, %zmm19, %zmm17
+// CHECK: encoding: [0x62,0xe3,0xe5,0x50,0x50,0x8a,0x00,0x04,0x00,0x00,0x7b]
+ vrangepd $0x7b,1024(%rdx){1to8}, %zmm19, %zmm17
+// CHECK: vrangepd $123, -1024(%rdx){1to8}, %zmm19, %zmm17
+// CHECK: encoding: [0x62,0xe3,0xe5,0x50,0x50,0x4a,0x80,0x7b]
+ vrangepd $0x7b,-1024(%rdx){1to8}, %zmm19, %zmm17
+// CHECK: vrangepd $123, -1032(%rdx){1to8}, %zmm19, %zmm17
+// CHECK: encoding: [0x62,0xe3,0xe5,0x50,0x50,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+ vrangepd $0x7b,-1032(%rdx){1to8}, %zmm19, %zmm17
+// CHECK: vrangeps $171, %zmm17, %zmm21, %zmm24
+// CHECK: encoding: [0x62,0x23,0x55,0x40,0x50,0xc1,0xab]
+ vrangeps $0xab, %zmm17, %zmm21, %zmm24
+// CHECK: vrangeps $171, %zmm17, %zmm21, %zmm24 {%k6}
+// CHECK: encoding: [0x62,0x23,0x55,0x46,0x50,0xc1,0xab]
+ vrangeps $0xab, %zmm17, %zmm21, %zmm24 {%k6}
+// CHECK: vrangeps $171, %zmm17, %zmm21, %zmm24 {%k6} {z}
+// CHECK: encoding: [0x62,0x23,0x55,0xc6,0x50,0xc1,0xab]
+ vrangeps $0xab, %zmm17, %zmm21, %zmm24 {%k6} {z}
+// CHECK: vrangeps $171,{sae}, %zmm17, %zmm21, %zmm24
+// CHECK: encoding: [0x62,0x23,0x55,0x10,0x50,0xc1,0xab]
+ vrangeps $0xab,{sae}, %zmm17, %zmm21, %zmm24
+// CHECK: vrangeps $123, %zmm17, %zmm21, %zmm24
+// CHECK: encoding: [0x62,0x23,0x55,0x40,0x50,0xc1,0x7b]
+ vrangeps $0x7b, %zmm17, %zmm21, %zmm24
+// CHECK: vrangeps $123,{sae}, %zmm17, %zmm21, %zmm24
+// CHECK: encoding: [0x62,0x23,0x55,0x10,0x50,0xc1,0x7b]
+ vrangeps $0x7b,{sae}, %zmm17, %zmm21, %zmm24
+// CHECK: vrangeps $123, (%rcx), %zmm21, %zmm24
+// CHECK: encoding: [0x62,0x63,0x55,0x40,0x50,0x01,0x7b]
+ vrangeps $0x7b,(%rcx), %zmm21, %zmm24
+// CHECK: vrangeps $123, 291(%rax,%r14,8), %zmm21, %zmm24
+// CHECK: encoding: [0x62,0x23,0x55,0x40,0x50,0x84,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vrangeps $0x7b,291(%rax,%r14,8), %zmm21, %zmm24
+// CHECK: vrangeps $123, (%rcx){1to16}, %zmm21, %zmm24
+// CHECK: encoding: [0x62,0x63,0x55,0x50,0x50,0x01,0x7b]
+ vrangeps $0x7b,(%rcx){1to16}, %zmm21, %zmm24
+// CHECK: vrangeps $123, 8128(%rdx), %zmm21, %zmm24
+// CHECK: encoding: [0x62,0x63,0x55,0x40,0x50,0x42,0x7f,0x7b]
+ vrangeps $0x7b,8128(%rdx), %zmm21, %zmm24
+// CHECK: vrangeps $123, 8192(%rdx), %zmm21, %zmm24
+// CHECK: encoding: [0x62,0x63,0x55,0x40,0x50,0x82,0x00,0x20,0x00,0x00,0x7b]
+ vrangeps $0x7b,8192(%rdx), %zmm21, %zmm24
+// CHECK: vrangeps $123, -8192(%rdx), %zmm21, %zmm24
+// CHECK: encoding: [0x62,0x63,0x55,0x40,0x50,0x42,0x80,0x7b]
+ vrangeps $0x7b,-8192(%rdx), %zmm21, %zmm24
+// CHECK: vrangeps $123, -8256(%rdx), %zmm21, %zmm24
+// CHECK: encoding: [0x62,0x63,0x55,0x40,0x50,0x82,0xc0,0xdf,0xff,0xff,0x7b]
+ vrangeps $0x7b,-8256(%rdx), %zmm21, %zmm24
+// CHECK: vrangeps $123, 508(%rdx){1to16}, %zmm21, %zmm24
+// CHECK: encoding: [0x62,0x63,0x55,0x50,0x50,0x42,0x7f,0x7b]
+ vrangeps $0x7b,508(%rdx){1to16}, %zmm21, %zmm24
+// CHECK: vrangeps $123, 512(%rdx){1to16}, %zmm21, %zmm24
+// CHECK: encoding: [0x62,0x63,0x55,0x50,0x50,0x82,0x00,0x02,0x00,0x00,0x7b]
+ vrangeps $0x7b,512(%rdx){1to16}, %zmm21, %zmm24
+// CHECK: vrangeps $123, -512(%rdx){1to16}, %zmm21, %zmm24
+// CHECK: encoding: [0x62,0x63,0x55,0x50,0x50,0x42,0x80,0x7b]
+ vrangeps $0x7b,-512(%rdx){1to16}, %zmm21, %zmm24
+// CHECK: vrangeps $123, -516(%rdx){1to16}, %zmm21, %zmm24
+// CHECK: encoding: [0x62,0x63,0x55,0x50,0x50,0x82,0xfc,0xfd,0xff,0xff,0x7b]
+ vrangeps $0x7b,-516(%rdx){1to16}, %zmm21, %zmm24
+// CHECK: vrangesd $171, %xmm21, %xmm17, %xmm17
+// CHECK: encoding: [0x62,0xa3,0xf5,0x00,0x51,0xcd,0xab]
+ vrangesd $0xab, %xmm21, %xmm17, %xmm17
+// CHECK: vrangesd $171, %xmm21, %xmm17, %xmm17 {%k5}
+// CHECK: encoding: [0x62,0xa3,0xf5,0x05,0x51,0xcd,0xab]
+ vrangesd $0xab, %xmm21, %xmm17, %xmm17 {%k5}
+// CHECK: vrangesd $171, %xmm21, %xmm17, %xmm17 {%k5} {z}
+// CHECK: encoding: [0x62,0xa3,0xf5,0x85,0x51,0xcd,0xab]
+ vrangesd $0xab, %xmm21, %xmm17, %xmm17 {%k5} {z}
+// CHECK: vrangesd $171,{sae}, %xmm21, %xmm17, %xmm17
+// CHECK: encoding: [0x62,0xa3,0xf5,0x10,0x51,0xcd,0xab]
+ vrangesd $0xab,{sae}, %xmm21, %xmm17, %xmm17
+// CHECK: vrangesd $123, %xmm21, %xmm17, %xmm17
+// CHECK: encoding: [0x62,0xa3,0xf5,0x00,0x51,0xcd,0x7b]
+ vrangesd $0x7b, %xmm21, %xmm17, %xmm17
+// CHECK: vrangesd $123,{sae}, %xmm21, %xmm17, %xmm17
+// CHECK: encoding: [0x62,0xa3,0xf5,0x10,0x51,0xcd,0x7b]
+ vrangesd $0x7b,{sae}, %xmm21, %xmm17, %xmm17
+// CHECK: vrangesd $123, (%rcx), %xmm17, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x51,0x09,0x7b]
+ vrangesd $0x7b,(%rcx), %xmm17, %xmm17
+// CHECK: vrangesd $123, 291(%rax,%r14,8), %xmm17, %xmm17
+// CHECK: encoding: [0x62,0xa3,0xf5,0x00,0x51,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vrangesd $0x7b,291(%rax,%r14,8), %xmm17, %xmm17
+// CHECK: vrangesd $123, 1016(%rdx), %xmm17, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x51,0x4a,0x7f,0x7b]
+ vrangesd $0x7b,1016(%rdx), %xmm17, %xmm17
+// CHECK: vrangesd $123, 1024(%rdx), %xmm17, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x51,0x8a,0x00,0x04,0x00,0x00,0x7b]
+ vrangesd $0x7b,1024(%rdx), %xmm17, %xmm17
+// CHECK: vrangesd $123, -1024(%rdx), %xmm17, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x51,0x4a,0x80,0x7b]
+ vrangesd $0x7b,-1024(%rdx), %xmm17, %xmm17
+// CHECK: vrangesd $123, -1032(%rdx), %xmm17, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x51,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+ vrangesd $0x7b,-1032(%rdx), %xmm17, %xmm17
+// CHECK: vrangess $171, %xmm20, %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x23,0x3d,0x00,0x51,0xcc,0xab]
+ vrangess $0xab, %xmm20, %xmm24, %xmm25
+// CHECK: vrangess $171, %xmm20, %xmm24, %xmm25 {%k5}
+// CHECK: encoding: [0x62,0x23,0x3d,0x05,0x51,0xcc,0xab]
+ vrangess $0xab, %xmm20, %xmm24, %xmm25 {%k5}
+// CHECK: vrangess $171, %xmm20, %xmm24, %xmm25 {%k5} {z}
+// CHECK: encoding: [0x62,0x23,0x3d,0x85,0x51,0xcc,0xab]
+ vrangess $0xab, %xmm20, %xmm24, %xmm25 {%k5} {z}
+// CHECK: vrangess $171,{sae}, %xmm20, %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x23,0x3d,0x10,0x51,0xcc,0xab]
+ vrangess $0xab,{sae}, %xmm20, %xmm24, %xmm25
+// CHECK: vrangess $123, %xmm20, %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x23,0x3d,0x00,0x51,0xcc,0x7b]
+ vrangess $0x7b, %xmm20, %xmm24, %xmm25
+// CHECK: vrangess $123,{sae}, %xmm20, %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x23,0x3d,0x10,0x51,0xcc,0x7b]
+ vrangess $0x7b,{sae}, %xmm20, %xmm24, %xmm25
+// CHECK: vrangess $123, (%rcx), %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x51,0x09,0x7b]
+ vrangess $0x7b,(%rcx), %xmm24, %xmm25
+// CHECK: vrangess $123, 291(%rax,%r14,8), %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x23,0x3d,0x00,0x51,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vrangess $0x7b,291(%rax,%r14,8), %xmm24, %xmm25
+// CHECK: vrangess $123, 508(%rdx), %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x51,0x4a,0x7f,0x7b]
+ vrangess $0x7b,508(%rdx), %xmm24, %xmm25
+// CHECK: vrangess $123, 512(%rdx), %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x51,0x8a,0x00,0x02,0x00,0x00,0x7b]
+ vrangess $0x7b,512(%rdx), %xmm24, %xmm25
+// CHECK: vrangess $123, -512(%rdx), %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x51,0x4a,0x80,0x7b]
+ vrangess $0x7b,-512(%rdx), %xmm24, %xmm25
+// CHECK: vrangess $123, -516(%rdx), %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x51,0x8a,0xfc,0xfd,0xff,0xff,0x7b]
+ vrangess $0x7b,-516(%rdx), %xmm24, %xmm25
diff --git a/test/MC/X86/x86-64-avx512dq_vl.s b/test/MC/X86/x86-64-avx512dq_vl.s
index 2de4564..d14ae6e 100644
--- a/test/MC/X86/x86-64-avx512dq_vl.s
+++ b/test/MC/X86/x86-64-avx512dq_vl.s
@@ -1968,3 +1968,242 @@
// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x5a,0x92,0xf0,0xf7,0xff,0xff]
vbroadcasti64x2 -2064(%rdx), %ymm18
+// CHECK: vrangepd $171, %xmm26, %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x03,0xa5,0x00,0x50,0xca,0xab]
+ vrangepd $0xab, %xmm26, %xmm27, %xmm25
+// CHECK: vrangepd $171, %xmm26, %xmm27, %xmm25 {%k6}
+// CHECK: encoding: [0x62,0x03,0xa5,0x06,0x50,0xca,0xab]
+ vrangepd $0xab, %xmm26, %xmm27, %xmm25 {%k6}
+// CHECK: vrangepd $171, %xmm26, %xmm27, %xmm25 {%k6} {z}
+// CHECK: encoding: [0x62,0x03,0xa5,0x86,0x50,0xca,0xab]
+ vrangepd $0xab, %xmm26, %xmm27, %xmm25 {%k6} {z}
+// CHECK: vrangepd $123, %xmm26, %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x03,0xa5,0x00,0x50,0xca,0x7b]
+ vrangepd $0x7b, %xmm26, %xmm27, %xmm25
+// CHECK: vrangepd $123, (%rcx), %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x63,0xa5,0x00,0x50,0x09,0x7b]
+ vrangepd $0x7b,(%rcx), %xmm27, %xmm25
+// CHECK: vrangepd $123, 291(%rax,%r14,8), %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x23,0xa5,0x00,0x50,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vrangepd $0x7b,291(%rax,%r14,8), %xmm27, %xmm25
+// CHECK: vrangepd $123, (%rcx){1to2}, %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x63,0xa5,0x10,0x50,0x09,0x7b]
+ vrangepd $0x7b,(%rcx){1to2}, %xmm27, %xmm25
+// CHECK: vrangepd $123, 2032(%rdx), %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x63,0xa5,0x00,0x50,0x4a,0x7f,0x7b]
+ vrangepd $0x7b,2032(%rdx), %xmm27, %xmm25
+// CHECK: vrangepd $123, 2048(%rdx), %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x63,0xa5,0x00,0x50,0x8a,0x00,0x08,0x00,0x00,0x7b]
+ vrangepd $0x7b,2048(%rdx), %xmm27, %xmm25
+// CHECK: vrangepd $123, -2048(%rdx), %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x63,0xa5,0x00,0x50,0x4a,0x80,0x7b]
+ vrangepd $0x7b,-2048(%rdx), %xmm27, %xmm25
+// CHECK: vrangepd $123, -2064(%rdx), %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x63,0xa5,0x00,0x50,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+ vrangepd $0x7b,-2064(%rdx), %xmm27, %xmm25
+// CHECK: vrangepd $123, 1016(%rdx){1to2}, %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x63,0xa5,0x10,0x50,0x4a,0x7f,0x7b]
+ vrangepd $0x7b,1016(%rdx){1to2}, %xmm27, %xmm25
+// CHECK: vrangepd $123, 1024(%rdx){1to2}, %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x63,0xa5,0x10,0x50,0x8a,0x00,0x04,0x00,0x00,0x7b]
+ vrangepd $0x7b,1024(%rdx){1to2}, %xmm27, %xmm25
+// CHECK: vrangepd $123, -1024(%rdx){1to2}, %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x63,0xa5,0x10,0x50,0x4a,0x80,0x7b]
+ vrangepd $0x7b,-1024(%rdx){1to2}, %xmm27, %xmm25
+// CHECK: vrangepd $123, -1032(%rdx){1to2}, %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x63,0xa5,0x10,0x50,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+ vrangepd $0x7b,-1032(%rdx){1to2}, %xmm27, %xmm25
+// CHECK: vrangepd $171, %ymm28, %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x03,0xa5,0x20,0x50,0xec,0xab]
+ vrangepd $0xab, %ymm28, %ymm27, %ymm29
+// CHECK: vrangepd $171, %ymm28, %ymm27, %ymm29 {%k7}
+// CHECK: encoding: [0x62,0x03,0xa5,0x27,0x50,0xec,0xab]
+ vrangepd $0xab, %ymm28, %ymm27, %ymm29 {%k7}
+// CHECK: vrangepd $171, %ymm28, %ymm27, %ymm29 {%k7} {z}
+// CHECK: encoding: [0x62,0x03,0xa5,0xa7,0x50,0xec,0xab]
+ vrangepd $0xab, %ymm28, %ymm27, %ymm29 {%k7} {z}
+// CHECK: vrangepd $123, %ymm28, %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x03,0xa5,0x20,0x50,0xec,0x7b]
+ vrangepd $0x7b, %ymm28, %ymm27, %ymm29
+// CHECK: vrangepd $123, (%rcx), %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0xa5,0x20,0x50,0x29,0x7b]
+ vrangepd $0x7b,(%rcx), %ymm27, %ymm29
+// CHECK: vrangepd $123, 291(%rax,%r14,8), %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x23,0xa5,0x20,0x50,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vrangepd $0x7b,291(%rax,%r14,8), %ymm27, %ymm29
+// CHECK: vrangepd $123, (%rcx){1to4}, %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0xa5,0x30,0x50,0x29,0x7b]
+ vrangepd $0x7b,(%rcx){1to4}, %ymm27, %ymm29
+// CHECK: vrangepd $123, 4064(%rdx), %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0xa5,0x20,0x50,0x6a,0x7f,0x7b]
+ vrangepd $0x7b,4064(%rdx), %ymm27, %ymm29
+// CHECK: vrangepd $123, 4096(%rdx), %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0xa5,0x20,0x50,0xaa,0x00,0x10,0x00,0x00,0x7b]
+ vrangepd $0x7b,4096(%rdx), %ymm27, %ymm29
+// CHECK: vrangepd $123, -4096(%rdx), %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0xa5,0x20,0x50,0x6a,0x80,0x7b]
+ vrangepd $0x7b,-4096(%rdx), %ymm27, %ymm29
+// CHECK: vrangepd $123, -4128(%rdx), %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0xa5,0x20,0x50,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+ vrangepd $0x7b,-4128(%rdx), %ymm27, %ymm29
+// CHECK: vrangepd $123, 1016(%rdx){1to4}, %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0xa5,0x30,0x50,0x6a,0x7f,0x7b]
+ vrangepd $0x7b,1016(%rdx){1to4}, %ymm27, %ymm29
+// CHECK: vrangepd $123, 1024(%rdx){1to4}, %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0xa5,0x30,0x50,0xaa,0x00,0x04,0x00,0x00,0x7b]
+ vrangepd $0x7b,1024(%rdx){1to4}, %ymm27, %ymm29
+// CHECK: vrangepd $123, -1024(%rdx){1to4}, %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0xa5,0x30,0x50,0x6a,0x80,0x7b]
+ vrangepd $0x7b,-1024(%rdx){1to4}, %ymm27, %ymm29
+// CHECK: vrangepd $123, -1032(%rdx){1to4}, %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0xa5,0x30,0x50,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
+ vrangepd $0x7b,-1032(%rdx){1to4}, %ymm27, %ymm29
+// CHECK: vrangeps $171, %xmm24, %xmm23, %xmm27
+// CHECK: encoding: [0x62,0x03,0x45,0x00,0x50,0xd8,0xab]
+ vrangeps $0xab, %xmm24, %xmm23, %xmm27
+// CHECK: vrangeps $171, %xmm24, %xmm23, %xmm27 {%k6}
+// CHECK: encoding: [0x62,0x03,0x45,0x06,0x50,0xd8,0xab]
+ vrangeps $0xab, %xmm24, %xmm23, %xmm27 {%k6}
+// CHECK: vrangeps $171, %xmm24, %xmm23, %xmm27 {%k6} {z}
+// CHECK: encoding: [0x62,0x03,0x45,0x86,0x50,0xd8,0xab]
+ vrangeps $0xab, %xmm24, %xmm23, %xmm27 {%k6} {z}
+// CHECK: vrangeps $123, %xmm24, %xmm23, %xmm27
+// CHECK: encoding: [0x62,0x03,0x45,0x00,0x50,0xd8,0x7b]
+ vrangeps $0x7b, %xmm24, %xmm23, %xmm27
+// CHECK: vrangeps $123, (%rcx), %xmm23, %xmm27
+// CHECK: encoding: [0x62,0x63,0x45,0x00,0x50,0x19,0x7b]
+ vrangeps $0x7b,(%rcx), %xmm23, %xmm27
+// CHECK: vrangeps $123, 291(%rax,%r14,8), %xmm23, %xmm27
+// CHECK: encoding: [0x62,0x23,0x45,0x00,0x50,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vrangeps $0x7b,291(%rax,%r14,8), %xmm23, %xmm27
+// CHECK: vrangeps $123, (%rcx){1to4}, %xmm23, %xmm27
+// CHECK: encoding: [0x62,0x63,0x45,0x10,0x50,0x19,0x7b]
+ vrangeps $0x7b,(%rcx){1to4}, %xmm23, %xmm27
+// CHECK: vrangeps $123, 2032(%rdx), %xmm23, %xmm27
+// CHECK: encoding: [0x62,0x63,0x45,0x00,0x50,0x5a,0x7f,0x7b]
+ vrangeps $0x7b,2032(%rdx), %xmm23, %xmm27
+// CHECK: vrangeps $123, 2048(%rdx), %xmm23, %xmm27
+// CHECK: encoding: [0x62,0x63,0x45,0x00,0x50,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ vrangeps $0x7b,2048(%rdx), %xmm23, %xmm27
+// CHECK: vrangeps $123, -2048(%rdx), %xmm23, %xmm27
+// CHECK: encoding: [0x62,0x63,0x45,0x00,0x50,0x5a,0x80,0x7b]
+ vrangeps $0x7b,-2048(%rdx), %xmm23, %xmm27
+// CHECK: vrangeps $123, -2064(%rdx), %xmm23, %xmm27
+// CHECK: encoding: [0x62,0x63,0x45,0x00,0x50,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ vrangeps $0x7b,-2064(%rdx), %xmm23, %xmm27
+// CHECK: vrangeps $123, 508(%rdx){1to4}, %xmm23, %xmm27
+// CHECK: encoding: [0x62,0x63,0x45,0x10,0x50,0x5a,0x7f,0x7b]
+ vrangeps $0x7b,508(%rdx){1to4}, %xmm23, %xmm27
+// CHECK: vrangeps $123, 512(%rdx){1to4}, %xmm23, %xmm27
+// CHECK: encoding: [0x62,0x63,0x45,0x10,0x50,0x9a,0x00,0x02,0x00,0x00,0x7b]
+ vrangeps $0x7b,512(%rdx){1to4}, %xmm23, %xmm27
+// CHECK: vrangeps $123, -512(%rdx){1to4}, %xmm23, %xmm27
+// CHECK: encoding: [0x62,0x63,0x45,0x10,0x50,0x5a,0x80,0x7b]
+ vrangeps $0x7b,-512(%rdx){1to4}, %xmm23, %xmm27
+// CHECK: vrangeps $123, -516(%rdx){1to4}, %xmm23, %xmm27
+// CHECK: encoding: [0x62,0x63,0x45,0x10,0x50,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+ vrangeps $0x7b,-516(%rdx){1to4}, %xmm23, %xmm27
+// CHECK: vrangeps $171, %ymm21, %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x23,0x45,0x20,0x50,0xc5,0xab]
+ vrangeps $0xab, %ymm21, %ymm23, %ymm24
+// CHECK: vrangeps $171, %ymm21, %ymm23, %ymm24 {%k7}
+// CHECK: encoding: [0x62,0x23,0x45,0x27,0x50,0xc5,0xab]
+ vrangeps $0xab, %ymm21, %ymm23, %ymm24 {%k7}
+// CHECK: vrangeps $171, %ymm21, %ymm23, %ymm24 {%k7} {z}
+// CHECK: encoding: [0x62,0x23,0x45,0xa7,0x50,0xc5,0xab]
+ vrangeps $0xab, %ymm21, %ymm23, %ymm24 {%k7} {z}
+// CHECK: vrangeps $123, %ymm21, %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x23,0x45,0x20,0x50,0xc5,0x7b]
+ vrangeps $0x7b, %ymm21, %ymm23, %ymm24
+// CHECK: vrangeps $123, (%rcx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0x45,0x20,0x50,0x01,0x7b]
+ vrangeps $0x7b,(%rcx), %ymm23, %ymm24
+// CHECK: vrangeps $123, 291(%rax,%r14,8), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x23,0x45,0x20,0x50,0x84,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vrangeps $0x7b,291(%rax,%r14,8), %ymm23, %ymm24
+// CHECK: vrangeps $123, (%rcx){1to8}, %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0x45,0x30,0x50,0x01,0x7b]
+ vrangeps $0x7b,(%rcx){1to8}, %ymm23, %ymm24
+// CHECK: vrangeps $123, 4064(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0x45,0x20,0x50,0x42,0x7f,0x7b]
+ vrangeps $0x7b,4064(%rdx), %ymm23, %ymm24
+// CHECK: vrangeps $123, 4096(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0x45,0x20,0x50,0x82,0x00,0x10,0x00,0x00,0x7b]
+ vrangeps $0x7b,4096(%rdx), %ymm23, %ymm24
+// CHECK: vrangeps $123, -4096(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0x45,0x20,0x50,0x42,0x80,0x7b]
+ vrangeps $0x7b,-4096(%rdx), %ymm23, %ymm24
+// CHECK: vrangeps $123, -4128(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0x45,0x20,0x50,0x82,0xe0,0xef,0xff,0xff,0x7b]
+ vrangeps $0x7b,-4128(%rdx), %ymm23, %ymm24
+// CHECK: vrangeps $123, 508(%rdx){1to8}, %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0x45,0x30,0x50,0x42,0x7f,0x7b]
+ vrangeps $0x7b,508(%rdx){1to8}, %ymm23, %ymm24
+// CHECK: vrangeps $123, 512(%rdx){1to8}, %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0x45,0x30,0x50,0x82,0x00,0x02,0x00,0x00,0x7b]
+ vrangeps $0x7b,512(%rdx){1to8}, %ymm23, %ymm24
+// CHECK: vrangeps $123, -512(%rdx){1to8}, %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0x45,0x30,0x50,0x42,0x80,0x7b]
+ vrangeps $0x7b,-512(%rdx){1to8}, %ymm23, %ymm24
+// CHECK: vrangeps $123, -516(%rdx){1to8}, %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0x45,0x30,0x50,0x82,0xfc,0xfd,0xff,0xff,0x7b]
+ vrangeps $0x7b,-516(%rdx){1to8}, %ymm23, %ymm24
diff --git a/test/MC/X86/x86-64-avx512f_vl.s b/test/MC/X86/x86-64-avx512f_vl.s
index 5007726..f521b3e 100644
--- a/test/MC/X86/x86-64-avx512f_vl.s
+++ b/test/MC/X86/x86-64-avx512f_vl.s
@@ -10188,12 +10188,947 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x5a,0x9a,0xf0,0xf7,0xff,0xff]
vbroadcasti32x4 -2064(%rdx), %ymm19
+// CHECK: vfixupimmps $171, %xmm17, %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x23,0x75,0x00,0x54,0xc9,0xab]
+ vfixupimmps $0xab, %xmm17, %xmm17, %xmm25
+// CHECK: vfixupimmps $171, %xmm17, %xmm17, %xmm25 {%k3}
+// CHECK: encoding: [0x62,0x23,0x75,0x03,0x54,0xc9,0xab]
+ vfixupimmps $0xab, %xmm17, %xmm17, %xmm25 {%k3}
+// CHECK: vfixupimmps $171, %xmm17, %xmm17, %xmm25 {%k3} {z}
+// CHECK: encoding: [0x62,0x23,0x75,0x83,0x54,0xc9,0xab]
+ vfixupimmps $0xab, %xmm17, %xmm17, %xmm25 {%k3} {z}
+// CHECK: vfixupimmps $123, %xmm17, %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x23,0x75,0x00,0x54,0xc9,0x7b]
+ vfixupimmps $0x7b, %xmm17, %xmm17, %xmm25
+// CHECK: vfixupimmps $123, (%rcx), %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x63,0x75,0x00,0x54,0x09,0x7b]
+ vfixupimmps $0x7b, (%rcx), %xmm17, %xmm25
+// CHECK: vfixupimmps $123, 291(%rax,%r14,8), %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x23,0x75,0x00,0x54,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vfixupimmps $0x7b, 291(%rax,%r14,8), %xmm17, %xmm25
+// CHECK: vfixupimmps $123, (%rcx){1to4}, %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x63,0x75,0x10,0x54,0x09,0x7b]
+ vfixupimmps $0x7b, (%rcx){1to4}, %xmm17, %xmm25
+// CHECK: vfixupimmps $123, 2032(%rdx), %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x63,0x75,0x00,0x54,0x4a,0x7f,0x7b]
+ vfixupimmps $0x7b, 2032(%rdx), %xmm17, %xmm25
+// CHECK: vfixupimmps $123, 2048(%rdx), %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x63,0x75,0x00,0x54,0x8a,0x00,0x08,0x00,0x00,0x7b]
+ vfixupimmps $0x7b, 2048(%rdx), %xmm17, %xmm25
+// CHECK: vfixupimmps $123, -2048(%rdx), %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x63,0x75,0x00,0x54,0x4a,0x80,0x7b]
+ vfixupimmps $0x7b, -2048(%rdx), %xmm17, %xmm25
+// CHECK: vfixupimmps $123, -2064(%rdx), %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x63,0x75,0x00,0x54,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+ vfixupimmps $0x7b, -2064(%rdx), %xmm17, %xmm25
+// CHECK: vfixupimmps $123, 508(%rdx){1to4}, %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x63,0x75,0x10,0x54,0x4a,0x7f,0x7b]
+ vfixupimmps $0x7b, 508(%rdx){1to4}, %xmm17, %xmm25
+// CHECK: vfixupimmps $123, 512(%rdx){1to4}, %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x63,0x75,0x10,0x54,0x8a,0x00,0x02,0x00,0x00,0x7b]
+ vfixupimmps $0x7b, 512(%rdx){1to4}, %xmm17, %xmm25
+// CHECK: vfixupimmps $123, -512(%rdx){1to4}, %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x63,0x75,0x10,0x54,0x4a,0x80,0x7b]
+ vfixupimmps $0x7b, -512(%rdx){1to4}, %xmm17, %xmm25
+// CHECK: vfixupimmps $123, -516(%rdx){1to4}, %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x63,0x75,0x10,0x54,0x8a,0xfc,0xfd,0xff,0xff,0x7b]
+ vfixupimmps $0x7b, -516(%rdx){1to4}, %xmm17, %xmm25
+// CHECK: vfixupimmps $171, %ymm28, %ymm21, %ymm30
+// CHECK: encoding: [0x62,0x03,0x55,0x20,0x54,0xf4,0xab]
+ vfixupimmps $0xab, %ymm28, %ymm21, %ymm30
+// CHECK: vfixupimmps $171, %ymm28, %ymm21, %ymm30 {%k4}
+// CHECK: encoding: [0x62,0x03,0x55,0x24,0x54,0xf4,0xab]
+ vfixupimmps $0xab, %ymm28, %ymm21, %ymm30 {%k4}
+// CHECK: vfixupimmps $171, %ymm28, %ymm21, %ymm30 {%k4} {z}
+// CHECK: encoding: [0x62,0x03,0x55,0xa4,0x54,0xf4,0xab]
+ vfixupimmps $0xab, %ymm28, %ymm21, %ymm30 {%k4} {z}
+// CHECK: vfixupimmps $123, %ymm28, %ymm21, %ymm30
+// CHECK: encoding: [0x62,0x03,0x55,0x20,0x54,0xf4,0x7b]
+ vfixupimmps $0x7b, %ymm28, %ymm21, %ymm30
+// CHECK: vfixupimmps $123, (%rcx), %ymm21, %ymm30
+// CHECK: encoding: [0x62,0x63,0x55,0x20,0x54,0x31,0x7b]
+ vfixupimmps $0x7b, (%rcx), %ymm21, %ymm30
+// CHECK: vfixupimmps $123, 291(%rax,%r14,8), %ymm21, %ymm30
+// CHECK: encoding: [0x62,0x23,0x55,0x20,0x54,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vfixupimmps $0x7b, 291(%rax,%r14,8), %ymm21, %ymm30
+// CHECK: vfixupimmps $123, (%rcx){1to8}, %ymm21, %ymm30
+// CHECK: encoding: [0x62,0x63,0x55,0x30,0x54,0x31,0x7b]
+ vfixupimmps $0x7b, (%rcx){1to8}, %ymm21, %ymm30
+// CHECK: vfixupimmps $123, 4064(%rdx), %ymm21, %ymm30
+// CHECK: encoding: [0x62,0x63,0x55,0x20,0x54,0x72,0x7f,0x7b]
+ vfixupimmps $0x7b, 4064(%rdx), %ymm21, %ymm30
+// CHECK: vfixupimmps $123, 4096(%rdx), %ymm21, %ymm30
+// CHECK: encoding: [0x62,0x63,0x55,0x20,0x54,0xb2,0x00,0x10,0x00,0x00,0x7b]
+ vfixupimmps $0x7b, 4096(%rdx), %ymm21, %ymm30
+// CHECK: vfixupimmps $123, -4096(%rdx), %ymm21, %ymm30
+// CHECK: encoding: [0x62,0x63,0x55,0x20,0x54,0x72,0x80,0x7b]
+ vfixupimmps $0x7b, -4096(%rdx), %ymm21, %ymm30
+// CHECK: vfixupimmps $123, -4128(%rdx), %ymm21, %ymm30
+// CHECK: encoding: [0x62,0x63,0x55,0x20,0x54,0xb2,0xe0,0xef,0xff,0xff,0x7b]
+ vfixupimmps $0x7b, -4128(%rdx), %ymm21, %ymm30
+// CHECK: vfixupimmps $123, 508(%rdx){1to8}, %ymm21, %ymm30
+// CHECK: encoding: [0x62,0x63,0x55,0x30,0x54,0x72,0x7f,0x7b]
+ vfixupimmps $0x7b, 508(%rdx){1to8}, %ymm21, %ymm30
+// CHECK: vfixupimmps $123, 512(%rdx){1to8}, %ymm21, %ymm30
+// CHECK: encoding: [0x62,0x63,0x55,0x30,0x54,0xb2,0x00,0x02,0x00,0x00,0x7b]
+ vfixupimmps $0x7b, 512(%rdx){1to8}, %ymm21, %ymm30
+// CHECK: vfixupimmps $123, -512(%rdx){1to8}, %ymm21, %ymm30
+// CHECK: encoding: [0x62,0x63,0x55,0x30,0x54,0x72,0x80,0x7b]
+ vfixupimmps $0x7b, -512(%rdx){1to8}, %ymm21, %ymm30
+// CHECK: vfixupimmps $123, -516(%rdx){1to8}, %ymm21, %ymm30
+// CHECK: encoding: [0x62,0x63,0x55,0x30,0x54,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
+ vfixupimmps $0x7b, -516(%rdx){1to8}, %ymm21, %ymm30
+// CHECK: vfixupimmpd $171, %xmm25, %xmm18, %xmm24
+// CHECK: encoding: [0x62,0x03,0xed,0x00,0x54,0xc1,0xab]
+ vfixupimmpd $0xab, %xmm25, %xmm18, %xmm24
+// CHECK: vfixupimmpd $171, %xmm25, %xmm18, %xmm24 {%k3}
+// CHECK: encoding: [0x62,0x03,0xed,0x03,0x54,0xc1,0xab]
+ vfixupimmpd $0xab, %xmm25, %xmm18, %xmm24 {%k3}
+// CHECK: vfixupimmpd $171, %xmm25, %xmm18, %xmm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x03,0xed,0x83,0x54,0xc1,0xab]
+ vfixupimmpd $0xab, %xmm25, %xmm18, %xmm24 {%k3} {z}
+// CHECK: vfixupimmpd $123, %xmm25, %xmm18, %xmm24
+// CHECK: encoding: [0x62,0x03,0xed,0x00,0x54,0xc1,0x7b]
+ vfixupimmpd $0x7b, %xmm25, %xmm18, %xmm24
+// CHECK: vfixupimmpd $123, (%rcx), %xmm18, %xmm24
+// CHECK: encoding: [0x62,0x63,0xed,0x00,0x54,0x01,0x7b]
+ vfixupimmpd $0x7b, (%rcx), %xmm18, %xmm24
+// CHECK: vfixupimmpd $123, 291(%rax,%r14,8), %xmm18, %xmm24
+// CHECK: encoding: [0x62,0x23,0xed,0x00,0x54,0x84,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vfixupimmpd $0x7b, 291(%rax,%r14,8), %xmm18, %xmm24
+// CHECK: vfixupimmpd $123, (%rcx){1to2}, %xmm18, %xmm24
+// CHECK: encoding: [0x62,0x63,0xed,0x10,0x54,0x01,0x7b]
+ vfixupimmpd $0x7b, (%rcx){1to2}, %xmm18, %xmm24
+// CHECK: vfixupimmpd $123, 2032(%rdx), %xmm18, %xmm24
+// CHECK: encoding: [0x62,0x63,0xed,0x00,0x54,0x42,0x7f,0x7b]
+ vfixupimmpd $0x7b, 2032(%rdx), %xmm18, %xmm24
+// CHECK: vfixupimmpd $123, 2048(%rdx), %xmm18, %xmm24
+// CHECK: encoding: [0x62,0x63,0xed,0x00,0x54,0x82,0x00,0x08,0x00,0x00,0x7b]
+ vfixupimmpd $0x7b, 2048(%rdx), %xmm18, %xmm24
+// CHECK: vfixupimmpd $123, -2048(%rdx), %xmm18, %xmm24
+// CHECK: encoding: [0x62,0x63,0xed,0x00,0x54,0x42,0x80,0x7b]
+ vfixupimmpd $0x7b, -2048(%rdx), %xmm18, %xmm24
+// CHECK: vfixupimmpd $123, -2064(%rdx), %xmm18, %xmm24
+// CHECK: encoding: [0x62,0x63,0xed,0x00,0x54,0x82,0xf0,0xf7,0xff,0xff,0x7b]
+ vfixupimmpd $0x7b, -2064(%rdx), %xmm18, %xmm24
+// CHECK: vfixupimmpd $123, 1016(%rdx){1to2}, %xmm18, %xmm24
+// CHECK: encoding: [0x62,0x63,0xed,0x10,0x54,0x42,0x7f,0x7b]
+ vfixupimmpd $0x7b, 1016(%rdx){1to2}, %xmm18, %xmm24
+// CHECK: vfixupimmpd $123, 1024(%rdx){1to2}, %xmm18, %xmm24
+// CHECK: encoding: [0x62,0x63,0xed,0x10,0x54,0x82,0x00,0x04,0x00,0x00,0x7b]
+ vfixupimmpd $0x7b, 1024(%rdx){1to2}, %xmm18, %xmm24
+// CHECK: vfixupimmpd $123, -1024(%rdx){1to2}, %xmm18, %xmm24
+// CHECK: encoding: [0x62,0x63,0xed,0x10,0x54,0x42,0x80,0x7b]
+ vfixupimmpd $0x7b, -1024(%rdx){1to2}, %xmm18, %xmm24
+// CHECK: vfixupimmpd $123, -1032(%rdx){1to2}, %xmm18, %xmm24
+// CHECK: encoding: [0x62,0x63,0xed,0x10,0x54,0x82,0xf8,0xfb,0xff,0xff,0x7b]
+ vfixupimmpd $0x7b, -1032(%rdx){1to2}, %xmm18, %xmm24
+// CHECK: vfixupimmpd $171, %ymm28, %ymm22, %ymm18
+// CHECK: encoding: [0x62,0x83,0xcd,0x20,0x54,0xd4,0xab]
+ vfixupimmpd $0xab, %ymm28, %ymm22, %ymm18
+// CHECK: vfixupimmpd $171, %ymm28, %ymm22, %ymm18 {%k2}
+// CHECK: encoding: [0x62,0x83,0xcd,0x22,0x54,0xd4,0xab]
+ vfixupimmpd $0xab, %ymm28, %ymm22, %ymm18 {%k2}
+// CHECK: vfixupimmpd $171, %ymm28, %ymm22, %ymm18 {%k2} {z}
+// CHECK: encoding: [0x62,0x83,0xcd,0xa2,0x54,0xd4,0xab]
+ vfixupimmpd $0xab, %ymm28, %ymm22, %ymm18 {%k2} {z}
+// CHECK: vfixupimmpd $123, %ymm28, %ymm22, %ymm18
+// CHECK: encoding: [0x62,0x83,0xcd,0x20,0x54,0xd4,0x7b]
+ vfixupimmpd $0x7b, %ymm28, %ymm22, %ymm18
+// CHECK: vfixupimmpd $123, (%rcx), %ymm22, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xcd,0x20,0x54,0x11,0x7b]
+ vfixupimmpd $0x7b, (%rcx), %ymm22, %ymm18
+// CHECK: vfixupimmpd $123, 291(%rax,%r14,8), %ymm22, %ymm18
+// CHECK: encoding: [0x62,0xa3,0xcd,0x20,0x54,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vfixupimmpd $0x7b, 291(%rax,%r14,8), %ymm22, %ymm18
+// CHECK: vfixupimmpd $123, (%rcx){1to4}, %ymm22, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xcd,0x30,0x54,0x11,0x7b]
+ vfixupimmpd $0x7b, (%rcx){1to4}, %ymm22, %ymm18
+// CHECK: vfixupimmpd $123, 4064(%rdx), %ymm22, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xcd,0x20,0x54,0x52,0x7f,0x7b]
+ vfixupimmpd $0x7b, 4064(%rdx), %ymm22, %ymm18
+// CHECK: vfixupimmpd $123, 4096(%rdx), %ymm22, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xcd,0x20,0x54,0x92,0x00,0x10,0x00,0x00,0x7b]
+ vfixupimmpd $0x7b, 4096(%rdx), %ymm22, %ymm18
+// CHECK: vfixupimmpd $123, -4096(%rdx), %ymm22, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xcd,0x20,0x54,0x52,0x80,0x7b]
+ vfixupimmpd $0x7b, -4096(%rdx), %ymm22, %ymm18
+// CHECK: vfixupimmpd $123, -4128(%rdx), %ymm22, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xcd,0x20,0x54,0x92,0xe0,0xef,0xff,0xff,0x7b]
+ vfixupimmpd $0x7b, -4128(%rdx), %ymm22, %ymm18
+// CHECK: vfixupimmpd $123, 1016(%rdx){1to4}, %ymm22, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xcd,0x30,0x54,0x52,0x7f,0x7b]
+ vfixupimmpd $0x7b, 1016(%rdx){1to4}, %ymm22, %ymm18
+// CHECK: vfixupimmpd $123, 1024(%rdx){1to4}, %ymm22, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xcd,0x30,0x54,0x92,0x00,0x04,0x00,0x00,0x7b]
+ vfixupimmpd $0x7b, 1024(%rdx){1to4}, %ymm22, %ymm18
+// CHECK: vfixupimmpd $123, -1024(%rdx){1to4}, %ymm22, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xcd,0x30,0x54,0x52,0x80,0x7b]
+ vfixupimmpd $0x7b, -1024(%rdx){1to4}, %ymm22, %ymm18
+// CHECK: vfixupimmpd $123, -1032(%rdx){1to4}, %ymm22, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xcd,0x30,0x54,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+ vfixupimmpd $0x7b, -1032(%rdx){1to4}, %ymm22, %ymm18
+// CHECK: vpshufd $171, %xmm23, %xmm17
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x70,0xcf,0xab]
+ vpshufd $171, %xmm23, %xmm17
+// CHECK: vpshufd $171, %xmm23, %xmm17 {%k1}
+// CHECK: encoding: [0x62,0xa1,0x7d,0x09,0x70,0xcf,0xab]
+ vpshufd $171, %xmm23, %xmm17 {%k1}
+// CHECK: vpshufd $171, %xmm23, %xmm17 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0x7d,0x89,0x70,0xcf,0xab]
+ vpshufd $171, %xmm23, %xmm17 {%k1} {z}
+// CHECK: vpshufd $123, %xmm23, %xmm17
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x70,0xcf,0x7b]
+ vpshufd $123, %xmm23, %xmm17
+// CHECK: vpshufd $123, (%rcx), %xmm17
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x70,0x09,0x7b]
+ vpshufd $123, (%rcx), %xmm17
+// CHECK: vpshufd $123, 291(%rax,%r14,8), %xmm17
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x70,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpshufd $123, 291(%rax,%r14,8), %xmm17
+// CHECK: vpshufd $123, 2032(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x70,0x4a,0x7f,0x7b]
+ vpshufd $123, 2032(%rdx), %xmm17
+// CHECK: vpshufd $123, (%rcx){1to4}, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x7d,0x18,0x70,0x09,0x7b]
+ vpshufd $123, (%rcx){1to4}, %xmm17
+// CHECK: vpshufd $123, 2048(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x70,0x8a,0x00,0x08,0x00,0x00,0x7b]
+ vpshufd $123, 2048(%rdx), %xmm17
+// CHECK: vpshufd $123, -2048(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x70,0x4a,0x80,0x7b]
+ vpshufd $123, -2048(%rdx), %xmm17
+// CHECK: vpshufd $123, -2064(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x70,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+ vpshufd $123, -2064(%rdx), %xmm17
+// CHECK: vpshufd $123, 508(%rdx){1to4}, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x7d,0x18,0x70,0x4a,0x7f,0x7b]
+ vpshufd $123, 508(%rdx){1to4}, %xmm17
+// CHECK: vpshufd $123, 512(%rdx){1to4}, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x7d,0x18,0x70,0x8a,0x00,0x02,0x00,0x00,0x7b]
+ vpshufd $123, 512(%rdx){1to4}, %xmm17
+// CHECK: vpshufd $123, -512(%rdx){1to4}, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x7d,0x18,0x70,0x4a,0x80,0x7b]
+ vpshufd $123, -512(%rdx){1to4}, %xmm17
+// CHECK: vpshufd $123, -516(%rdx){1to4}, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x7d,0x18,0x70,0x8a,0xfc,0xfd,0xff,0xff,0x7b]
+ vpshufd $123, -516(%rdx){1to4}, %xmm17
+// CHECK: vpshufd $171, %ymm22, %ymm20
+// CHECK: encoding: [0x62,0xa1,0x7d,0x28,0x70,0xe6,0xab]
+ vpshufd $171, %ymm22, %ymm20
+// CHECK: vpshufd $171, %ymm22, %ymm20 {%k2}
+// CHECK: encoding: [0x62,0xa1,0x7d,0x2a,0x70,0xe6,0xab]
+ vpshufd $171, %ymm22, %ymm20 {%k2}
+// CHECK: vpshufd $171, %ymm22, %ymm20 {%k2} {z}
+// CHECK: encoding: [0x62,0xa1,0x7d,0xaa,0x70,0xe6,0xab]
+ vpshufd $171, %ymm22, %ymm20 {%k2} {z}
+// CHECK: vpshufd $123, %ymm22, %ymm20
+// CHECK: encoding: [0x62,0xa1,0x7d,0x28,0x70,0xe6,0x7b]
+ vpshufd $123, %ymm22, %ymm20
+// CHECK: vpshufd $123, (%rcx), %ymm20
+// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0x70,0x21,0x7b]
+ vpshufd $123, (%rcx), %ymm20
+// CHECK: vpshufd $123, 291(%rax,%r14,8), %ymm20
+// CHECK: encoding: [0x62,0xa1,0x7d,0x28,0x70,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpshufd $123, 291(%rax,%r14,8), %ymm20
+// CHECK: vpshufd $123, (%rcx){1to8}, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x7d,0x38,0x70,0x21,0x7b]
+ vpshufd $123, (%rcx){1to8}, %ymm20
+// CHECK: vpshufd $123, 4064(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0x70,0x62,0x7f,0x7b]
+ vpshufd $123, 4064(%rdx), %ymm20
+// CHECK: vpshufd $123, 4096(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0x70,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vpshufd $123, 4096(%rdx), %ymm20
+// CHECK: vpshufd $123, -4096(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0x70,0x62,0x80,0x7b]
+ vpshufd $123, -4096(%rdx), %ymm20
+// CHECK: vpshufd $123, -4128(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0x70,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vpshufd $123, -4128(%rdx), %ymm20
+// CHECK: vpshufd $123, 508(%rdx){1to8}, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x7d,0x38,0x70,0x62,0x7f,0x7b]
+ vpshufd $123, 508(%rdx){1to8}, %ymm20
+// CHECK: vpshufd $123, 512(%rdx){1to8}, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x7d,0x38,0x70,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vpshufd $123, 512(%rdx){1to8}, %ymm20
+// CHECK: vpshufd $123, -512(%rdx){1to8}, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x7d,0x38,0x70,0x62,0x80,0x7b]
+ vpshufd $123, -512(%rdx){1to8}, %ymm20
+// CHECK: vpshufd $123, -516(%rdx){1to8}, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x7d,0x38,0x70,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vpshufd $123, -516(%rdx){1to8}, %ymm20
+// CHECK: vgetexppd %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x42,0xca]
+ vgetexppd %xmm18, %xmm17
+// CHECK: vgetexppd %xmm18, %xmm17 {%k1}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x09,0x42,0xca]
+ vgetexppd %xmm18, %xmm17 {%k1}
+// CHECK: vgetexppd %xmm18, %xmm17 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x89,0x42,0xca]
+ vgetexppd %xmm18, %xmm17 {%k1} {z}
+// CHECK: vgetexppd (%rcx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x42,0x09]
+ vgetexppd (%rcx), %xmm17
+// CHECK: vgetexppd 291(%rax,%r14,8), %xmm17
+// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x42,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vgetexppd 291(%rax,%r14,8), %xmm17
+// CHECK: vgetexppd (%rcx){1to2}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x42,0x09]
+ vgetexppd (%rcx){1to2}, %xmm17
+// CHECK: vgetexppd 2032(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x42,0x4a,0x7f]
+ vgetexppd 2032(%rdx), %xmm17
+// CHECK: vgetexppd 2048(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x42,0x8a,0x00,0x08,0x00,0x00]
+ vgetexppd 2048(%rdx), %xmm17
+// CHECK: vgetexppd -2048(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x42,0x4a,0x80]
+ vgetexppd -2048(%rdx), %xmm17
+// CHECK: vgetexppd -2064(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x42,0x8a,0xf0,0xf7,0xff,0xff]
+ vgetexppd -2064(%rdx), %xmm17
+// CHECK: vgetexppd 1016(%rdx){1to2}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x42,0x4a,0x7f]
+ vgetexppd 1016(%rdx){1to2}, %xmm17
+// CHECK: vgetexppd 1024(%rdx){1to2}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x42,0x8a,0x00,0x04,0x00,0x00]
+ vgetexppd 1024(%rdx){1to2}, %xmm17
+// CHECK: vgetexppd -1024(%rdx){1to2}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x42,0x4a,0x80]
+ vgetexppd -1024(%rdx){1to2}, %xmm17
+// CHECK: vgetexppd -1032(%rdx){1to2}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x42,0x8a,0xf8,0xfb,0xff,0xff]
+ vgetexppd -1032(%rdx){1to2}, %xmm17
+// CHECK: vgetexppd %ymm17, %ymm20
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x42,0xe1]
+ vgetexppd %ymm17, %ymm20
+// CHECK: vgetexppd %ymm17, %ymm20 {%k3}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2b,0x42,0xe1]
+ vgetexppd %ymm17, %ymm20 {%k3}
+// CHECK: vgetexppd %ymm17, %ymm20 {%k3} {z}
+// CHECK: encoding: [0x62,0xa2,0xfd,0xab,0x42,0xe1]
+ vgetexppd %ymm17, %ymm20 {%k3} {z}
+// CHECK: vgetexppd (%rcx), %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x42,0x21]
+ vgetexppd (%rcx), %ymm20
+// CHECK: vgetexppd 291(%rax,%r14,8), %ymm20
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x42,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vgetexppd 291(%rax,%r14,8), %ymm20
+// CHECK: vgetexppd (%rcx){1to4}, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x42,0x21]
+ vgetexppd (%rcx){1to4}, %ymm20
+// CHECK: vgetexppd 4064(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x42,0x62,0x7f]
+ vgetexppd 4064(%rdx), %ymm20
+// CHECK: vgetexppd 4096(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x42,0xa2,0x00,0x10,0x00,0x00]
+ vgetexppd 4096(%rdx), %ymm20
+// CHECK: vgetexppd -4096(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x42,0x62,0x80]
+ vgetexppd -4096(%rdx), %ymm20
+// CHECK: vgetexppd -4128(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x42,0xa2,0xe0,0xef,0xff,0xff]
+ vgetexppd -4128(%rdx), %ymm20
+// CHECK: vgetexppd 1016(%rdx){1to4}, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x42,0x62,0x7f]
+ vgetexppd 1016(%rdx){1to4}, %ymm20
+// CHECK: vgetexppd 1024(%rdx){1to4}, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x42,0xa2,0x00,0x04,0x00,0x00]
+ vgetexppd 1024(%rdx){1to4}, %ymm20
+// CHECK: vgetexppd -1024(%rdx){1to4}, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x42,0x62,0x80]
+ vgetexppd -1024(%rdx){1to4}, %ymm20
+// CHECK: vgetexppd -1032(%rdx){1to4}, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x42,0xa2,0xf8,0xfb,0xff,0xff]
+ vgetexppd -1032(%rdx){1to4}, %ymm20
+// CHECK: vgetexpps %xmm27, %xmm17
+// CHECK: encoding: [0x62,0x82,0x7d,0x08,0x42,0xcb]
+ vgetexpps %xmm27, %xmm17
+// CHECK: vgetexpps %xmm27, %xmm17 {%k2}
+// CHECK: encoding: [0x62,0x82,0x7d,0x0a,0x42,0xcb]
+ vgetexpps %xmm27, %xmm17 {%k2}
+// CHECK: vgetexpps %xmm27, %xmm17 {%k2} {z}
+// CHECK: encoding: [0x62,0x82,0x7d,0x8a,0x42,0xcb]
+ vgetexpps %xmm27, %xmm17 {%k2} {z}
+// CHECK: vgetexpps (%rcx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x42,0x09]
+ vgetexpps (%rcx), %xmm17
+// CHECK: vgetexpps 291(%rax,%r14,8), %xmm17
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x42,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vgetexpps 291(%rax,%r14,8), %xmm17
+// CHECK: vgetexpps (%rcx){1to4}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x42,0x09]
+ vgetexpps (%rcx){1to4}, %xmm17
+// CHECK: vgetexpps 2032(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x42,0x4a,0x7f]
+ vgetexpps 2032(%rdx), %xmm17
+// CHECK: vgetexpps 2048(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x42,0x8a,0x00,0x08,0x00,0x00]
+ vgetexpps 2048(%rdx), %xmm17
+// CHECK: vgetexpps -2048(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x42,0x4a,0x80]
+ vgetexpps -2048(%rdx), %xmm17
+// CHECK: vgetexpps -2064(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x42,0x8a,0xf0,0xf7,0xff,0xff]
+ vgetexpps -2064(%rdx), %xmm17
+// CHECK: vgetexpps 508(%rdx){1to4}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x42,0x4a,0x7f]
+ vgetexpps 508(%rdx){1to4}, %xmm17
+// CHECK: vgetexpps 512(%rdx){1to4}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x42,0x8a,0x00,0x02,0x00,0x00]
+ vgetexpps 512(%rdx){1to4}, %xmm17
+// CHECK: vgetexpps -512(%rdx){1to4}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x42,0x4a,0x80]
+ vgetexpps -512(%rdx){1to4}, %xmm17
+// CHECK: vgetexpps -516(%rdx){1to4}, %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x42,0x8a,0xfc,0xfd,0xff,0xff]
+ vgetexpps -516(%rdx){1to4}, %xmm17
+// CHECK: vgetexpps %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x02,0x7d,0x28,0x42,0xf5]
+ vgetexpps %ymm29, %ymm30
+// CHECK: vgetexpps %ymm29, %ymm30 {%k6}
+// CHECK: encoding: [0x62,0x02,0x7d,0x2e,0x42,0xf5]
+ vgetexpps %ymm29, %ymm30 {%k6}
+// CHECK: vgetexpps %ymm29, %ymm30 {%k6} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0xae,0x42,0xf5]
+ vgetexpps %ymm29, %ymm30 {%k6} {z}
+// CHECK: vgetexpps (%rcx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x42,0x31]
+ vgetexpps (%rcx), %ymm30
+// CHECK: vgetexpps 291(%rax,%r14,8), %ymm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x42,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vgetexpps 291(%rax,%r14,8), %ymm30
+// CHECK: vgetexpps (%rcx){1to8}, %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x42,0x31]
+ vgetexpps (%rcx){1to8}, %ymm30
+// CHECK: vgetexpps 4064(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x42,0x72,0x7f]
+ vgetexpps 4064(%rdx), %ymm30
+// CHECK: vgetexpps 4096(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x42,0xb2,0x00,0x10,0x00,0x00]
+ vgetexpps 4096(%rdx), %ymm30
+// CHECK: vgetexpps -4096(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x42,0x72,0x80]
+ vgetexpps -4096(%rdx), %ymm30
+// CHECK: vgetexpps -4128(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x42,0xb2,0xe0,0xef,0xff,0xff]
+ vgetexpps -4128(%rdx), %ymm30
+// CHECK: vgetexpps 508(%rdx){1to8}, %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x42,0x72,0x7f]
+ vgetexpps 508(%rdx){1to8}, %ymm30
+// CHECK: vgetexpps 512(%rdx){1to8}, %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x42,0xb2,0x00,0x02,0x00,0x00]
+ vgetexpps 512(%rdx){1to8}, %ymm30
+// CHECK: vgetexpps -512(%rdx){1to8}, %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x42,0x72,0x80]
+ vgetexpps -512(%rdx){1to8}, %ymm30
+// CHECK: vgetexpps -516(%rdx){1to8}, %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x42,0xb2,0xfc,0xfd,0xff,0xff]
+ vgetexpps -516(%rdx){1to8}, %ymm30
+// CHECK: vshuff32x4 $171, %ymm18, %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x23,0x25,0x20,0x23,0xea,0xab]
+ vshuff32x4 $0xab, %ymm18, %ymm27, %ymm29
+// CHECK: vshuff32x4 $171, %ymm18, %ymm27, %ymm29 {%k7}
+// CHECK: encoding: [0x62,0x23,0x25,0x27,0x23,0xea,0xab]
+ vshuff32x4 $0xab, %ymm18, %ymm27, %ymm29 {%k7}
+// CHECK: vshuff32x4 $171, %ymm18, %ymm27, %ymm29 {%k7} {z}
+// CHECK: encoding: [0x62,0x23,0x25,0xa7,0x23,0xea,0xab]
+ vshuff32x4 $0xab, %ymm18, %ymm27, %ymm29 {%k7} {z}
+// CHECK: vshuff32x4 $123, %ymm18, %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x23,0x25,0x20,0x23,0xea,0x7b]
+ vshuff32x4 $0x7b, %ymm18, %ymm27, %ymm29
+// CHECK: vshuff32x4 $123, (%rcx), %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0x25,0x20,0x23,0x29,0x7b]
+ vshuff32x4 $0x7b, (%rcx), %ymm27, %ymm29
+// CHECK: vshuff32x4 $123, 291(%rax,%r14,8), %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x23,0x25,0x20,0x23,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vshuff32x4 $0x7b, 291(%rax,%r14,8), %ymm27, %ymm29
+// CHECK: vshuff32x4 $123, (%rcx){1to8}, %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0x25,0x30,0x23,0x29,0x7b]
+ vshuff32x4 $0x7b, (%rcx){1to8}, %ymm27, %ymm29
+// CHECK: vshuff32x4 $123, 4064(%rdx), %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0x25,0x20,0x23,0x6a,0x7f,0x7b]
+ vshuff32x4 $0x7b, 4064(%rdx), %ymm27, %ymm29
+// CHECK: vshuff32x4 $123, 4096(%rdx), %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0x25,0x20,0x23,0xaa,0x00,0x10,0x00,0x00,0x7b]
+ vshuff32x4 $0x7b, 4096(%rdx), %ymm27, %ymm29
+// CHECK: vshuff32x4 $123, -4096(%rdx), %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0x25,0x20,0x23,0x6a,0x80,0x7b]
+ vshuff32x4 $0x7b, -4096(%rdx), %ymm27, %ymm29
+// CHECK: vshuff32x4 $123, -4128(%rdx), %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0x25,0x20,0x23,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+ vshuff32x4 $0x7b, -4128(%rdx), %ymm27, %ymm29
+// CHECK: vshuff32x4 $123, 508(%rdx){1to8}, %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0x25,0x30,0x23,0x6a,0x7f,0x7b]
+ vshuff32x4 $0x7b, 508(%rdx){1to8}, %ymm27, %ymm29
+// CHECK: vshuff32x4 $123, 512(%rdx){1to8}, %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0x25,0x30,0x23,0xaa,0x00,0x02,0x00,0x00,0x7b]
+ vshuff32x4 $0x7b, 512(%rdx){1to8}, %ymm27, %ymm29
+// CHECK: vshuff32x4 $123, -512(%rdx){1to8}, %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0x25,0x30,0x23,0x6a,0x80,0x7b]
+ vshuff32x4 $0x7b, -512(%rdx){1to8}, %ymm27, %ymm29
+// CHECK: vshuff32x4 $123, -516(%rdx){1to8}, %ymm27, %ymm29
+// CHECK: encoding: [0x62,0x63,0x25,0x30,0x23,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+ vshuff32x4 $0x7b, -516(%rdx){1to8}, %ymm27, %ymm29
+// CHECK: vshuff64x2 $171, %ymm20, %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xa3,0xed,0x20,0x23,0xd4,0xab]
+ vshuff64x2 $0xab, %ymm20, %ymm18, %ymm18
+// CHECK: vshuff64x2 $171, %ymm20, %ymm18, %ymm18 {%k5}
+// CHECK: encoding: [0x62,0xa3,0xed,0x25,0x23,0xd4,0xab]
+ vshuff64x2 $0xab, %ymm20, %ymm18, %ymm18 {%k5}
+// CHECK: vshuff64x2 $171, %ymm20, %ymm18, %ymm18 {%k5} {z}
+// CHECK: encoding: [0x62,0xa3,0xed,0xa5,0x23,0xd4,0xab]
+ vshuff64x2 $0xab, %ymm20, %ymm18, %ymm18 {%k5} {z}
+// CHECK: vshuff64x2 $123, %ymm20, %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xa3,0xed,0x20,0x23,0xd4,0x7b]
+ vshuff64x2 $0x7b, %ymm20, %ymm18, %ymm18
+// CHECK: vshuff64x2 $123, (%rcx), %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xed,0x20,0x23,0x11,0x7b]
+ vshuff64x2 $0x7b, (%rcx), %ymm18, %ymm18
+// CHECK: vshuff64x2 $123, 291(%rax,%r14,8), %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xa3,0xed,0x20,0x23,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vshuff64x2 $0x7b, 291(%rax,%r14,8), %ymm18, %ymm18
+// CHECK: vshuff64x2 $123, (%rcx){1to4}, %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xed,0x30,0x23,0x11,0x7b]
+ vshuff64x2 $0x7b, (%rcx){1to4}, %ymm18, %ymm18
+// CHECK: vshuff64x2 $123, 4064(%rdx), %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xed,0x20,0x23,0x52,0x7f,0x7b]
+ vshuff64x2 $0x7b, 4064(%rdx), %ymm18, %ymm18
+// CHECK: vshuff64x2 $123, 4096(%rdx), %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xed,0x20,0x23,0x92,0x00,0x10,0x00,0x00,0x7b]
+ vshuff64x2 $0x7b, 4096(%rdx), %ymm18, %ymm18
+// CHECK: vshuff64x2 $123, -4096(%rdx), %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xed,0x20,0x23,0x52,0x80,0x7b]
+ vshuff64x2 $0x7b, -4096(%rdx), %ymm18, %ymm18
+// CHECK: vshuff64x2 $123, -4128(%rdx), %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xed,0x20,0x23,0x92,0xe0,0xef,0xff,0xff,0x7b]
+ vshuff64x2 $0x7b, -4128(%rdx), %ymm18, %ymm18
+// CHECK: vshuff64x2 $123, 1016(%rdx){1to4}, %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xed,0x30,0x23,0x52,0x7f,0x7b]
+ vshuff64x2 $0x7b, 1016(%rdx){1to4}, %ymm18, %ymm18
+// CHECK: vshuff64x2 $123, 1024(%rdx){1to4}, %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xed,0x30,0x23,0x92,0x00,0x04,0x00,0x00,0x7b]
+ vshuff64x2 $0x7b, 1024(%rdx){1to4}, %ymm18, %ymm18
+// CHECK: vshuff64x2 $123, -1024(%rdx){1to4}, %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xed,0x30,0x23,0x52,0x80,0x7b]
+ vshuff64x2 $0x7b, -1024(%rdx){1to4}, %ymm18, %ymm18
+// CHECK: vshuff64x2 $123, -1032(%rdx){1to4}, %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0xed,0x30,0x23,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+ vshuff64x2 $0x7b, -1032(%rdx){1to4}, %ymm18, %ymm18
+// CHECK: vshufi32x4 $171, %ymm17, %ymm27, %ymm18
+// CHECK: encoding: [0x62,0xa3,0x25,0x20,0x43,0xd1,0xab]
+ vshufi32x4 $0xab, %ymm17, %ymm27, %ymm18
+// CHECK: vshufi32x4 $171, %ymm17, %ymm27, %ymm18 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x25,0x27,0x43,0xd1,0xab]
+ vshufi32x4 $0xab, %ymm17, %ymm27, %ymm18 {%k7}
+// CHECK: vshufi32x4 $171, %ymm17, %ymm27, %ymm18 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x25,0xa7,0x43,0xd1,0xab]
+ vshufi32x4 $0xab, %ymm17, %ymm27, %ymm18 {%k7} {z}
+// CHECK: vshufi32x4 $123, %ymm17, %ymm27, %ymm18
+// CHECK: encoding: [0x62,0xa3,0x25,0x20,0x43,0xd1,0x7b]
+ vshufi32x4 $0x7b, %ymm17, %ymm27, %ymm18
+// CHECK: vshufi32x4 $123, (%rcx), %ymm27, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x25,0x20,0x43,0x11,0x7b]
+ vshufi32x4 $0x7b, (%rcx), %ymm27, %ymm18
+// CHECK: vshufi32x4 $123, 291(%rax,%r14,8), %ymm27, %ymm18
+// CHECK: encoding: [0x62,0xa3,0x25,0x20,0x43,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vshufi32x4 $0x7b, 291(%rax,%r14,8), %ymm27, %ymm18
+// CHECK: vshufi32x4 $123, (%rcx){1to8}, %ymm27, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x25,0x30,0x43,0x11,0x7b]
+ vshufi32x4 $0x7b, (%rcx){1to8}, %ymm27, %ymm18
+// CHECK: vshufi32x4 $123, 4064(%rdx), %ymm27, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x25,0x20,0x43,0x52,0x7f,0x7b]
+ vshufi32x4 $0x7b, 4064(%rdx), %ymm27, %ymm18
+// CHECK: vshufi32x4 $123, 4096(%rdx), %ymm27, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x25,0x20,0x43,0x92,0x00,0x10,0x00,0x00,0x7b]
+ vshufi32x4 $0x7b, 4096(%rdx), %ymm27, %ymm18
+// CHECK: vshufi32x4 $123, -4096(%rdx), %ymm27, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x25,0x20,0x43,0x52,0x80,0x7b]
+ vshufi32x4 $0x7b, -4096(%rdx), %ymm27, %ymm18
+// CHECK: vshufi32x4 $123, -4128(%rdx), %ymm27, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x25,0x20,0x43,0x92,0xe0,0xef,0xff,0xff,0x7b]
+ vshufi32x4 $0x7b, -4128(%rdx), %ymm27, %ymm18
+// CHECK: vshufi32x4 $123, 508(%rdx){1to8}, %ymm27, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x25,0x30,0x43,0x52,0x7f,0x7b]
+ vshufi32x4 $0x7b, 508(%rdx){1to8}, %ymm27, %ymm18
+// CHECK: vshufi32x4 $123, 512(%rdx){1to8}, %ymm27, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x25,0x30,0x43,0x92,0x00,0x02,0x00,0x00,0x7b]
+ vshufi32x4 $0x7b, 512(%rdx){1to8}, %ymm27, %ymm18
+// CHECK: vshufi32x4 $123, -512(%rdx){1to8}, %ymm27, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x25,0x30,0x43,0x52,0x80,0x7b]
+ vshufi32x4 $0x7b, -512(%rdx){1to8}, %ymm27, %ymm18
+// CHECK: vshufi32x4 $123, -516(%rdx){1to8}, %ymm27, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x25,0x30,0x43,0x92,0xfc,0xfd,0xff,0xff,0x7b]
+ vshufi32x4 $0x7b, -516(%rdx){1to8}, %ymm27, %ymm18
+// CHECK: vshufi64x2 $171, %ymm21, %ymm26, %ymm25
+// CHECK: encoding: [0x62,0x23,0xad,0x20,0x43,0xcd,0xab]
+ vshufi64x2 $0xab, %ymm21, %ymm26, %ymm25
+// CHECK: vshufi64x2 $171, %ymm21, %ymm26, %ymm25 {%k3}
+// CHECK: encoding: [0x62,0x23,0xad,0x23,0x43,0xcd,0xab]
+ vshufi64x2 $0xab, %ymm21, %ymm26, %ymm25 {%k3}
+// CHECK: vshufi64x2 $171, %ymm21, %ymm26, %ymm25 {%k3} {z}
+// CHECK: encoding: [0x62,0x23,0xad,0xa3,0x43,0xcd,0xab]
+ vshufi64x2 $0xab, %ymm21, %ymm26, %ymm25 {%k3} {z}
+// CHECK: vshufi64x2 $123, %ymm21, %ymm26, %ymm25
+// CHECK: encoding: [0x62,0x23,0xad,0x20,0x43,0xcd,0x7b]
+ vshufi64x2 $0x7b, %ymm21, %ymm26, %ymm25
+// CHECK: vshufi64x2 $123, (%rcx), %ymm26, %ymm25
+// CHECK: encoding: [0x62,0x63,0xad,0x20,0x43,0x09,0x7b]
+ vshufi64x2 $0x7b, (%rcx), %ymm26, %ymm25
+// CHECK: vshufi64x2 $123, 291(%rax,%r14,8), %ymm26, %ymm25
+// CHECK: encoding: [0x62,0x23,0xad,0x20,0x43,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vshufi64x2 $0x7b, 291(%rax,%r14,8), %ymm26, %ymm25
+// CHECK: vshufi64x2 $123, (%rcx){1to4}, %ymm26, %ymm25
+// CHECK: encoding: [0x62,0x63,0xad,0x30,0x43,0x09,0x7b]
+ vshufi64x2 $0x7b, (%rcx){1to4}, %ymm26, %ymm25
+// CHECK: vshufi64x2 $123, 4064(%rdx), %ymm26, %ymm25
+// CHECK: encoding: [0x62,0x63,0xad,0x20,0x43,0x4a,0x7f,0x7b]
+ vshufi64x2 $0x7b, 4064(%rdx), %ymm26, %ymm25
+// CHECK: vshufi64x2 $123, 4096(%rdx), %ymm26, %ymm25
+// CHECK: encoding: [0x62,0x63,0xad,0x20,0x43,0x8a,0x00,0x10,0x00,0x00,0x7b]
+ vshufi64x2 $0x7b, 4096(%rdx), %ymm26, %ymm25
+// CHECK: vshufi64x2 $123, -4096(%rdx), %ymm26, %ymm25
+// CHECK: encoding: [0x62,0x63,0xad,0x20,0x43,0x4a,0x80,0x7b]
+ vshufi64x2 $0x7b, -4096(%rdx), %ymm26, %ymm25
+// CHECK: vshufi64x2 $123, -4128(%rdx), %ymm26, %ymm25
+// CHECK: encoding: [0x62,0x63,0xad,0x20,0x43,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+ vshufi64x2 $0x7b, -4128(%rdx), %ymm26, %ymm25
+// CHECK: vshufi64x2 $123, 1016(%rdx){1to4}, %ymm26, %ymm25
+// CHECK: encoding: [0x62,0x63,0xad,0x30,0x43,0x4a,0x7f,0x7b]
+ vshufi64x2 $0x7b, 1016(%rdx){1to4}, %ymm26, %ymm25
+// CHECK: vshufi64x2 $123, 1024(%rdx){1to4}, %ymm26, %ymm25
+// CHECK: encoding: [0x62,0x63,0xad,0x30,0x43,0x8a,0x00,0x04,0x00,0x00,0x7b]
+ vshufi64x2 $0x7b, 1024(%rdx){1to4}, %ymm26, %ymm25
+// CHECK: vshufi64x2 $123, -1024(%rdx){1to4}, %ymm26, %ymm25
+// CHECK: encoding: [0x62,0x63,0xad,0x30,0x43,0x4a,0x80,0x7b]
+ vshufi64x2 $0x7b, -1024(%rdx){1to4}, %ymm26, %ymm25
+// CHECK: vshufi64x2 $123, -1032(%rdx){1to4}, %ymm26, %ymm25
+// CHECK: encoding: [0x62,0x63,0xad,0x30,0x43,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+ vshufi64x2 $0x7b, -1032(%rdx){1to4}, %ymm26, %ymm25
+// CHECK: valignq $171, %xmm24, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0x83,0xed,0x00,0x03,0xd8,0xab]
+ valignq $0xab, %xmm24, %xmm18, %xmm19
+// CHECK: valignq $171, %xmm24, %xmm18, %xmm19 {%k5}
+// CHECK: encoding: [0x62,0x83,0xed,0x05,0x03,0xd8,0xab]
+ valignq $0xab, %xmm24, %xmm18, %xmm19 {%k5}
+// CHECK: valignq $171, %xmm24, %xmm18, %xmm19 {%k5} {z}
+// CHECK: encoding: [0x62,0x83,0xed,0x85,0x03,0xd8,0xab]
+ valignq $0xab, %xmm24, %xmm18, %xmm19 {%k5} {z}
+// CHECK: valignq $123, %xmm24, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0x83,0xed,0x00,0x03,0xd8,0x7b]
+ valignq $0x7b, %xmm24, %xmm18, %xmm19
+// CHECK: valignq $123, (%rcx), %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x00,0x03,0x19,0x7b]
+ valignq $0x7b, (%rcx), %xmm18, %xmm19
+// CHECK: valignq $123, 291(%rax,%r14,8), %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xa3,0xed,0x00,0x03,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ valignq $0x7b, 291(%rax,%r14,8), %xmm18, %xmm19
+// CHECK: valignq $123, (%rcx){1to2}, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x10,0x03,0x19,0x7b]
+ valignq $0x7b, (%rcx){1to2}, %xmm18, %xmm19
+// CHECK: valignq $123, 2032(%rdx), %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x00,0x03,0x5a,0x7f,0x7b]
+ valignq $0x7b, 2032(%rdx), %xmm18, %xmm19
+// CHECK: valignq $123, 2048(%rdx), %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x00,0x03,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ valignq $0x7b, 2048(%rdx), %xmm18, %xmm19
+// CHECK: valignq $123, -2048(%rdx), %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x00,0x03,0x5a,0x80,0x7b]
+ valignq $0x7b, -2048(%rdx), %xmm18, %xmm19
+// CHECK: valignq $123, -2064(%rdx), %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x00,0x03,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ valignq $0x7b, -2064(%rdx), %xmm18, %xmm19
+// CHECK: valignq $123, 1016(%rdx){1to2}, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x10,0x03,0x5a,0x7f,0x7b]
+ valignq $0x7b, 1016(%rdx){1to2}, %xmm18, %xmm19
+// CHECK: valignq $123, 1024(%rdx){1to2}, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x10,0x03,0x9a,0x00,0x04,0x00,0x00,0x7b]
+ valignq $0x7b, 1024(%rdx){1to2}, %xmm18, %xmm19
+// CHECK: valignq $123, -1024(%rdx){1to2}, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x10,0x03,0x5a,0x80,0x7b]
+ valignq $0x7b, -1024(%rdx){1to2}, %xmm18, %xmm19
+// CHECK: valignq $123, -1032(%rdx){1to2}, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x10,0x03,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+ valignq $0x7b, -1032(%rdx){1to2}, %xmm18, %xmm19
+// CHECK: valignq $171, %ymm26, %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x03,0xbd,0x20,0x03,0xca,0xab]
+ valignq $0xab, %ymm26, %ymm24, %ymm25
+// CHECK: valignq $171, %ymm26, %ymm24, %ymm25 {%k2}
+// CHECK: encoding: [0x62,0x03,0xbd,0x22,0x03,0xca,0xab]
+ valignq $0xab, %ymm26, %ymm24, %ymm25 {%k2}
+// CHECK: valignq $171, %ymm26, %ymm24, %ymm25 {%k2} {z}
+// CHECK: encoding: [0x62,0x03,0xbd,0xa2,0x03,0xca,0xab]
+ valignq $0xab, %ymm26, %ymm24, %ymm25 {%k2} {z}
+// CHECK: valignq $123, %ymm26, %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x03,0xbd,0x20,0x03,0xca,0x7b]
+ valignq $0x7b, %ymm26, %ymm24, %ymm25
+// CHECK: valignq $123, (%rcx), %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x03,0x09,0x7b]
+ valignq $0x7b, (%rcx), %ymm24, %ymm25
+// CHECK: valignq $123, 291(%rax,%r14,8), %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x23,0xbd,0x20,0x03,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ valignq $0x7b, 291(%rax,%r14,8), %ymm24, %ymm25
+// CHECK: valignq $123, (%rcx){1to4}, %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x30,0x03,0x09,0x7b]
+ valignq $0x7b, (%rcx){1to4}, %ymm24, %ymm25
+// CHECK: valignq $123, 4064(%rdx), %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x03,0x4a,0x7f,0x7b]
+ valignq $0x7b, 4064(%rdx), %ymm24, %ymm25
+// CHECK: valignq $123, 4096(%rdx), %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x03,0x8a,0x00,0x10,0x00,0x00,0x7b]
+ valignq $0x7b, 4096(%rdx), %ymm24, %ymm25
+// CHECK: valignq $123, -4096(%rdx), %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x03,0x4a,0x80,0x7b]
+ valignq $0x7b, -4096(%rdx), %ymm24, %ymm25
+// CHECK: valignq $123, -4128(%rdx), %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x03,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+ valignq $0x7b, -4128(%rdx), %ymm24, %ymm25
+// CHECK: valignq $123, 1016(%rdx){1to4}, %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x30,0x03,0x4a,0x7f,0x7b]
+ valignq $0x7b, 1016(%rdx){1to4}, %ymm24, %ymm25
+// CHECK: valignq $123, 1024(%rdx){1to4}, %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x30,0x03,0x8a,0x00,0x04,0x00,0x00,0x7b]
+ valignq $0x7b, 1024(%rdx){1to4}, %ymm24, %ymm25
+// CHECK: valignq $123, -1024(%rdx){1to4}, %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x30,0x03,0x4a,0x80,0x7b]
+ valignq $0x7b, -1024(%rdx){1to4}, %ymm24, %ymm25
+// CHECK: valignq $123, -1032(%rdx){1to4}, %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x30,0x03,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+ valignq $0x7b, -1032(%rdx){1to4}, %ymm24, %ymm25
diff --git a/test/Object/Inputs/macho-invalid-header b/test/Object/Inputs/macho-invalid-header
new file mode 100644
index 0000000..da52d43
--- /dev/null
+++ b/test/Object/Inputs/macho-invalid-header
Binary files differ
diff --git a/test/Object/Inputs/macho64-invalid-incomplete-segment-load-command b/test/Object/Inputs/macho64-invalid-incomplete-segment-load-command
new file mode 100644
index 0000000..82ec724
--- /dev/null
+++ b/test/Object/Inputs/macho64-invalid-incomplete-segment-load-command
Binary files differ
diff --git a/test/Object/Inputs/no-start-symbol.elf-x86_64 b/test/Object/Inputs/no-start-symbol.elf-x86_64
new file mode 100644
index 0000000..b2c9edc
--- /dev/null
+++ b/test/Object/Inputs/no-start-symbol.elf-x86_64
Binary files differ
diff --git a/test/Object/X86/no-start-symbol.test b/test/Object/X86/no-start-symbol.test
new file mode 100644
index 0000000..b468894
--- /dev/null
+++ b/test/Object/X86/no-start-symbol.test
@@ -0,0 +1,9 @@
+RUN: llvm-objdump -d %p/../Inputs/no-start-symbol.elf-x86_64 | FileCheck %s
+Test that we disassemble the start of the section.
+CHECK: Disassembly of section .text:
+CHECK-NEXT: .text:
+CHECK-NEXT: 0: 90 nop
+CHECK: foo:
+CHECK-NEXT: 1: 90 nop
diff --git a/test/Object/macho-invalid.test b/test/Object/macho-invalid.test
index e2c9b6b..f4aa1e0 100644
--- a/test/Object/macho-invalid.test
+++ b/test/Object/macho-invalid.test
@@ -3,36 +3,40 @@ RUN: llvm-objdump -private-headers %p/Inputs/macho-invalid-zero-ncmds
RUN: not llvm-objdump -private-headers %p/Inputs/macho64-invalid-incomplete-load-command 2>&1 \
RUN: | FileCheck -check-prefix INCOMPLETE-LOADC %s
+INCOMPLETE-LOADC: Invalid data was encountered while parsing the file.
RUN: not llvm-objdump -private-headers %p/Inputs/macho-invalid-too-small-load-command 2>&1 \
RUN: | FileCheck -check-prefix SMALL-LOADC-SIZE %s
RUN: not llvm-objdump -private-headers %p/Inputs/macho64-invalid-too-small-load-command 2>&1 \
RUN: | FileCheck -check-prefix SMALL-LOADC-SIZE %s
+SMALL-LOADC-SIZE: Mach-O load command with size < 8 bytes
RUN: not llvm-objdump -private-headers %p/Inputs/macho-invalid-too-small-segment-load-command 2>&1 \
RUN: | FileCheck -check-prefix SMALL-SEGLOADC-SIZE %s
RUN: not llvm-objdump -private-headers %p/Inputs/macho64-invalid-too-small-segment-load-command 2>&1 \
RUN: | FileCheck -check-prefix SMALL-SEGLOADC-SIZE %s
+SMALL-SEGLOADC-SIZE: Mach-O segment load command size is too small
RUN: not llvm-objdump -private-headers %p/Inputs/macho-invalid-no-size-for-sections 2>&1 \
RUN: | FileCheck -check-prefix TOO-MANY-SECTS %s
RUN: not llvm-objdump -private-headers %p/Inputs/macho64-invalid-no-size-for-sections 2>&1 \
RUN: | FileCheck -check-prefix TOO-MANY-SECTS %s
+TOO-MANY-SECTS: Mach-O segment load command contains too many sections
RUN: not llvm-objdump -t %p/Inputs/macho-invalid-bad-symbol-index 2>&1 \
RUN: | FileCheck -check-prefix BAD-SYMBOL %s
+BAD-SYMBOL: Requested symbol index is out of range
RUN: not llvm-objdump -t %p/Inputs/macho-invalid-symbol-name-past-eof 2>&1 \
RUN: | FileCheck -check-prefix NAME-PAST-EOF %s
+NAME-PAST-EOF: Symbol name entry points before beginning or past end of file
RUN: not llvm-nm %p/Inputs/macho-invalid-section-index-getSectionRawName 2>&1 \
RUN: | FileCheck -check-prefix INVALID-SECTION-IDX-SYMBOL-SEC %s
+INVALID-SECTION-IDX-SYMBOL-SEC: getSymbolSection: Invalid section index
-SMALL-LOADC-SIZE: Load command with size < 8 bytes
-SMALL-SEGLOADC-SIZE: Segment load command size is too small
-INCOMPLETE-LOADC: Malformed MachO file
-TOO-MANY-SECTS: Number of sections too large for size of load command
-BAD-SYMBOL: Requested symbol index is out of range
-NAME-PAST-EOF: Symbol name entry points before beginning or past end of file
+RUN: not llvm-objdump -private-headers %p/Inputs/macho-invalid-header 2>&1 | FileCheck -check-prefix INVALID-HEADER %s
+INVALID-HEADER: Invalid data was encountered while parsing the file
-INVALID-SECTION-IDX-SYMBOL-SEC: getSymbolSection: Invalid section index
+RUN: not llvm-objdump -private-headers %p/Inputs/macho64-invalid-incomplete-segment-load-command 2>&1 | FileCheck -check-prefix INCOMPLETE-SEGMENT-LOADC %s
+INCOMPLETE-SEGMENT-LOADC: Invalid data was encountered while parsing the file
diff --git a/test/Object/obj2yaml.test b/test/Object/obj2yaml.test
index 2a3f7c8..08000f66 100644
--- a/test/Object/obj2yaml.test
+++ b/test/Object/obj2yaml.test
@@ -267,26 +267,19 @@ ELF-MIPSEL-NEXT: - Name: '$.str'
ELF-MIPSEL-NEXT: Section: .rodata.str1.1
ELF-MIPSEL-NEXT: Size: 0x000000000000000D
-ELF-MIPSEL-NEXT: - Name: .text
ELF-MIPSEL-NEXT: Section: .text
-ELF-MIPSEL-NEXT: - Name: .data
ELF-MIPSEL-NEXT: Section: .data
-ELF-MIPSEL-NEXT: - Name: .bss
ELF-MIPSEL-NEXT: Section: .bss
-ELF-MIPSEL-NEXT: - Name: .mdebug.abi32
ELF-MIPSEL-NEXT: Section: .mdebug.abi32
-ELF-MIPSEL-NEXT: - Name: .rodata.str1.1
ELF-MIPSEL-NEXT: Section: .rodata.str1.1
-ELF-MIPSEL-NEXT: - Name: .reginfo
ELF-MIPSEL-NEXT: Section: .reginfo
-ELF-MIPSEL-NEXT: - Name: .MIPS.abiflags
ELF-MIPSEL-NEXT: Section: .MIPS.abiflags
ELF-MIPSEL-NEXT: - Name: main
@@ -343,22 +336,17 @@ ELF-MIPS64EL-NEXT: AddressAlign: 0x0000000000000004
ELF-MIPS64EL-NEXT: Content: ''
-ELF-MIPS64EL-NEXT: - Name: .text
ELF-MIPS64EL-NEXT: Section: .text
-ELF-MIPS64EL-NEXT: - Name: .data
ELF-MIPS64EL-NEXT: Section: .data
-ELF-MIPS64EL-NEXT: - Name: .bss
ELF-MIPS64EL-NEXT: Section: .bss
ELF-MIPS64EL-NEXT: - Name: bar
ELF-MIPS64EL-NEXT: Section: .data
-ELF-MIPS64EL-NEXT: - Name: .MIPS.options
ELF-MIPS64EL-NEXT: Section: .MIPS.options
-ELF-MIPS64EL-NEXT: - Name: .pdr
ELF-MIPS64EL-NEXT: Section: .pdr
ELF-MIPS64EL-NEXT: - Name: zed
@@ -394,7 +382,7 @@ ELF-X86-64-NEXT: AddressAlign: 0x0000000000000008
ELF-X86-64-NEXT: Info: .text
ELF-X86-64-NEXT: Relocations:
ELF-X86-64-NEXT: - Offset: 0x000000000000000D
-ELF-X86-64-NEXT: Symbol: .rodata.str1.1
+ELF-X86-64-NEXT: Symbol: ''
ELF-X86-64-NEXT: Type: R_X86_64_32S
ELF-X86-64-NEXT: - Offset: 0x0000000000000012
ELF-X86-64-NEXT: Symbol: puts
@@ -408,14 +396,11 @@ ELF-X86-64-NEXT: Symbols:
ELF-X86-64-NEXT: Local:
ELF-X86-64-NEXT: - Name: trivial-object-test.s
-ELF-X86-64-NEXT: - Name: .text
ELF-X86-64-NEXT: Section: .text
-ELF-X86-64-NEXT: - Name: .rodata.str1.1
ELF-X86-64-NEXT: Section: .rodata.str1.1
-ELF-X86-64-NEXT: - Name: .note.GNU-stack
ELF-X86-64-NEXT: Section: .note.GNU-stack
ELF-X86-64-NEXT: Global:
ELF-X86-64-NEXT: - Name: main
diff --git a/test/Object/readobj-shared-object.test b/test/Object/readobj-shared-object.test
index 516d4c6..508caca 100644
--- a/test/Object/readobj-shared-object.test
+++ b/test/Object/readobj-shared-object.test
@@ -128,61 +128,61 @@ ELF: ]
ELF: Symbols [
ELF: Symbol {
-ELF: Name: .hash
+ELF: Name: (0)
ELF: Binding: Local
ELF: Type: Section
ELF: Section: .hash
ELF: }
ELF: Symbol {
-ELF: Name: .dynsym
+ELF: Name: (0)
ELF: Binding: Local
ELF: Type: Section
ELF: Section: .dynsym
ELF: }
ELF: Symbol {
-ELF: Name: .dynstr
+ELF: Name: (0)
ELF: Binding: Local
ELF: Type: Section
ELF: Section: .dynstr
ELF: }
ELF: Symbol {
-ELF: Name: .text
+ELF: Name: (0)
ELF: Binding: Local
ELF: Type: Section
ELF: Section: .text
ELF: }
ELF: Symbol {
-ELF: Name: .eh_frame
+ELF: Name: (0)
ELF: Binding: Local
ELF: Type: Section
ELF: Section: .eh_frame
ELF: }
ELF: Symbol {
-ELF: Name: .tdata
+ELF: Name: (0)
ELF: Binding: Local
ELF: Type: Section
ELF: Section: .tdata
ELF: }
ELF: Symbol {
-ELF: Name: .dynamic
+ELF: Name: (0)
ELF: Binding: Local
ELF: Type: Section
ELF: Section: .dynamic
ELF: }
ELF: Symbol {
-ELF: Name: .got.plt
+ELF: Name: (0)
ELF: Binding: Local
ELF: Type: Section
ELF: Section: .got.plt
ELF: }
ELF: Symbol {
-ELF: Name: .data
+ELF: Name: (0)
ELF: Binding: Local
ELF: Type: Section
ELF: Section: .data
ELF: }
ELF: Symbol {
-ELF: Name: .bss
+ELF: Name: (0)
ELF: Binding: Local
ELF: Type: Section
ELF: Section: .bss
diff --git a/test/Transforms/CorrelatedValuePropagation/select.ll b/test/Transforms/CorrelatedValuePropagation/select.ll
index 5501438..d88e3e4 100644
--- a/test/Transforms/CorrelatedValuePropagation/select.ll
+++ b/test/Transforms/CorrelatedValuePropagation/select.ll
@@ -51,3 +51,25 @@ else:
ret i8 %b
+@c = global i32 0, align 4
+@b = global i32 0, align 4
+; CHECK-LABEL: @PR23752(
+define i32 @PR23752() {
+ br label %for.body
+ %phi = phi i32 [ 0, %entry ], [ %sel, %for.body ]
+ %sel = select i1 icmp sgt (i32* @b, i32* @c), i32 %phi, i32 1
+ %cmp = icmp ne i32 %sel, 1
+ br i1 %cmp, label %for.body, label %if.end
+; CHECK: %[[sel:.*]] = select i1 icmp sgt (i32* @b, i32* @c), i32 0, i32 1
+; CHECK-NEXT: %[[cmp:.*]] = icmp ne i32 %[[sel]], 1
+; CHECK-NEXT: br i1 %[[cmp]]
+ ret i32 %sel
+; CHECK: ret i32 %[[sel]]
diff --git a/test/Transforms/GVN/unreachable_block_infinite_loop.ll b/test/Transforms/GVN/unreachable_block_infinite_loop.ll
index fca5a28..a47e9e4 100644
--- a/test/Transforms/GVN/unreachable_block_infinite_loop.ll
+++ b/test/Transforms/GVN/unreachable_block_infinite_loop.ll
@@ -12,3 +12,32 @@ unreachable_block:
ret i32 %a
+define i32 @pr23096_test0() {
+ br label %bb0
+ %ptr1 = ptrtoint i32* %ptr2 to i64
+ %ptr2 = inttoptr i64 %ptr1 to i32*
+ br i1 undef, label %bb0, label %bb1
+ %phi = phi i32* [ undef, %entry ], [ %ptr2, %bb1 ]
+ %load = load i32, i32* %phi
+ ret i32 %load
+define i32 @pr23096_test1() {
+ br label %bb0
+ %ptr1 = getelementptr i32, i32* %ptr2, i32 0
+ %ptr2 = getelementptr i32, i32* %ptr1, i32 0
+ br i1 undef, label %bb0, label %bb1
+ %phi = phi i32* [ undef, %entry ], [ %ptr2, %bb1 ]
+ %load = load i32, i32* %phi
+ ret i32 %load
diff --git a/test/Transforms/IndVarSimplify/exit_value_test2.ll b/test/Transforms/IndVarSimplify/exit_value_test2.ll
new file mode 100644
index 0000000..24e3e95
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/exit_value_test2.ll
@@ -0,0 +1,52 @@
+; PR23538
+; RUN: opt < %s -indvars -loop-deletion -S | FileCheck %s
+; Check IndVarSimplify should not replace exit value because or else
+; udiv will be introduced by expand and the cost will be high.
+; CHECK-LABEL: @_Z3fooPKcjj(
+; CHECK-NOT: udiv
+declare void @_Z3mixRjj(i32* dereferenceable(4), i32)
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+define i32 @_Z3fooPKcjj(i8* nocapture readonly %s, i32 %len, i32 %c) {
+ %a = alloca i32, align 4
+ %tmp = bitcast i32* %a to i8*
+ call void @llvm.lifetime.start(i64 4, i8* %tmp)
+ store i32 -1640531527, i32* %a, align 4
+ %cmp8 = icmp ugt i32 %len, 11
+ br i1 %cmp8, label, label %while.end
+ ; preds = %entry
+ br label %while.body
+while.body: ; preds = %while.body,
+ %keylen.010 = phi i32 [ %len, ], [ %sub, %while.body ]
+ %s.addr.09 = phi i8* [ %s, ], [ %add.ptr, %while.body ]
+ %tmp1 = bitcast i8* %s.addr.09 to i32*
+ %tmp2 = load i32, i32* %tmp1, align 4
+ %shl.i = shl i32 %tmp2, 1
+ %and.i = and i32 %shl.i, 16843008
+ %tmp3 = load i32, i32* %a, align 4
+ %sub.i = add i32 %tmp3, %tmp2
+ %add = sub i32 %sub.i, %and.i
+ store i32 %add, i32* %a, align 4
+ %add.ptr = getelementptr inbounds i8, i8* %s.addr.09, i64 12
+ %sub = add i32 %keylen.010, -12
+ %cmp = icmp ugt i32 %sub, 11
+ br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge
+while.cond.while.end_crit_edge: ; preds = %while.body
+ %sub.lcssa = phi i32 [ %sub, %while.body ]
+ br label %while.end
+while.end: ; preds = %while.cond.while.end_crit_edge, %entry
+ %keylen.0.lcssa = phi i32 [ %sub.lcssa, %while.cond.while.end_crit_edge ], [ %len, %entry ]
+ call void @_Z3mixRjj(i32* dereferenceable(4) %a, i32 %keylen.0.lcssa)
+ %tmp4 = load i32, i32* %a, align 4
+ call void @llvm.lifetime.end(i64 4, i8* %tmp)
+ ret i32 %tmp4
diff --git a/test/Transforms/IndVarSimplify/exit_value_test3.ll b/test/Transforms/IndVarSimplify/exit_value_test3.ll
new file mode 100644
index 0000000..2051d2a
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/exit_value_test3.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -indvars -loop-deletion -S |FileCheck %s
+; Check IndVarSimplify should replace exit value even if the expansion cost
+; is high because the loop can be deleted after the exit value rewrite.
+; CHECK-LABEL: @_Z3fooPKcjj(
+; CHECK: udiv
+; CHECK: [[LABEL:^[a-zA-Z0-9_.]+]]:
+; CHECK-NOT: br {{.*}} [[LABEL]]
+define i32 @_Z3fooPKcjj(i8* nocapture readnone %s, i32 %len, i32 %c) #0 {
+ br label %while.cond
+while.cond: ; preds = %while.cond, %entry
+ %klen.0 = phi i32 [ %len, %entry ], [ %sub, %while.cond ]
+ %cmp = icmp ugt i32 %klen.0, 11
+ %sub = add i32 %klen.0, -12
+ br i1 %cmp, label %while.cond, label %while.end
+while.end: ; preds = %while.cond
+ %klen.0.lcssa = phi i32 [ %klen.0, %while.cond ]
+ ret i32 %klen.0.lcssa
diff --git a/test/Transforms/IndVarSimplify/lcssa-preservation.ll b/test/Transforms/IndVarSimplify/lcssa-preservation.ll
index f69c96c..5d502f3 100644
--- a/test/Transforms/IndVarSimplify/lcssa-preservation.ll
+++ b/test/Transforms/IndVarSimplify/lcssa-preservation.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -indvars -S | FileCheck %s
+; RUN: opt < %s -indvars -replexitval=always -S | FileCheck %s
; Make sure IndVars preserves LCSSA form, especially across loop nests.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/Transforms/InstCombine/fpcast.ll b/test/Transforms/InstCombine/fpcast.ll
index 8319624..93a64e6b 100644
--- a/test/Transforms/InstCombine/fpcast.ll
+++ b/test/Transforms/InstCombine/fpcast.ll
@@ -85,3 +85,11 @@ define float @test8(float %V) {
; CHECK-NEXT: %[[trunc:.*]] = fptrunc double %frem to float
; CHECK-NEXT: ret float %trunc
+; CHECK-LABEL: @test_fptrunc_fptrunc
+; CHECK-NOT: fptrunc double {{.*}} to half
+define half @test_fptrunc_fptrunc(double %V) {
+ %t1 = fptrunc double %V to float
+ %t2 = fptrunc float %t1 to half
+ ret half %t2
diff --git a/test/Transforms/InstCombine/load-bitcast32.ll b/test/Transforms/InstCombine/load-bitcast32.ll
new file mode 100644
index 0000000..b1c78a8
--- /dev/null
+++ b/test/Transforms/InstCombine/load-bitcast32.ll
@@ -0,0 +1,79 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+target datalayout = "p:32:32:32"
+define i64* @test1(i8* %x) {
+; CHECK-LABEL: @test1(
+; CHECK: load i64, i64*
+; CHECK: ret
+ %a = bitcast i8* %x to i64*
+ %b = load i64, i64* %a
+ %c = inttoptr i64 %b to i64*
+ ret i64* %c
+define i32* @test2(i8* %x) {
+; CHECK-LABEL: @test2(
+; CHECK: load i32*, i32**
+; CHECK: ret
+ %a = bitcast i8* %x to i32*
+ %b = load i32, i32* %a
+ %c = inttoptr i32 %b to i32*
+ ret i32* %c
+define i64* @test3(i8* %x) {
+; CHECK-LABEL: @test3(
+; CHECK: load i64*, i64**
+; CHECK: ret
+ %a = bitcast i8* %x to i32*
+ %b = load i32, i32* %a
+ %c = inttoptr i32 %b to i64*
+ ret i64* %c
+define i64 @test4(i8* %x) {
+; CHECK-LABEL: @test4(
+; CHECK: load i32, i32*
+; CHECK: zext
+; CHECK: ret
+ %a = bitcast i8* %x to i64**
+ %b = load i64*, i64** %a
+ %c = ptrtoint i64* %b to i64
+ ret i64 %c
+define i32 @test5(i8* %x) {
+; CHECK-LABEL: @test5(
+; CHECK: load i32, i32*
+; CHECK: ret
+ %a = bitcast i8* %x to i32**
+ %b = load i32*, i32** %a
+ %c = ptrtoint i32* %b to i32
+ ret i32 %c
+define i64 @test6(i8* %x) {
+; CHECK-LABEL: @test6(
+; CHECK: load i32, i32*
+; CHECK: zext
+; CHECK: ret
+ %a = bitcast i8* %x to i32**
+ %b = load i32*, i32** %a
+ %c = ptrtoint i32* %b to i64
+ ret i64 %c
diff --git a/test/Transforms/InstCombine/load-bitcast64.ll b/test/Transforms/InstCombine/load-bitcast64.ll
new file mode 100644
index 0000000..d14c686
--- /dev/null
+++ b/test/Transforms/InstCombine/load-bitcast64.ll
@@ -0,0 +1,78 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+target datalayout = "p:64:64:64"
+define i64* @test1(i8* %x) {
+; CHECK-LABEL: @test1(
+; CHECK: load i64*, i64**
+; CHECK: ret
+ %a = bitcast i8* %x to i64*
+ %b = load i64, i64* %a
+ %c = inttoptr i64 %b to i64*
+ ret i64* %c
+define i32* @test2(i8* %x) {
+; CHECK-LABEL: @test2(
+; CHECK: load i32, i32*
+; CHECK: ret
+ %a = bitcast i8* %x to i32*
+ %b = load i32, i32* %a
+ %c = inttoptr i32 %b to i32*
+ ret i32* %c
+define i64* @test3(i8* %x) {
+; CHECK-LABEL: @test3(
+; CHECK: load i32, i32*
+; CHECK: ret
+ %a = bitcast i8* %x to i32*
+ %b = load i32, i32* %a
+ %c = inttoptr i32 %b to i64*
+ ret i64* %c
+define i64 @test4(i8* %x) {
+; CHECK-LABEL: @test4(
+; CHECK: load i64, i64*
+; CHECK: ret
+ %a = bitcast i8* %x to i64**
+ %b = load i64*, i64** %a
+ %c = ptrtoint i64* %b to i64
+ ret i64 %c
+define i32 @test5(i8* %x) {
+; CHECK-LABEL: @test5(
+; CHECK: load i64, i64*
+; CHECK: trunc
+; CHECK: ret
+ %a = bitcast i8* %x to i32**
+ %b = load i32*, i32** %a
+ %c = ptrtoint i32* %b to i32
+ ret i32 %c
+define i64 @test6(i8* %x) {
+; CHECK-LABEL: @test6(
+; CHECK: load i64, i64*
+; CHECK: ret
+ %a = bitcast i8* %x to i32**
+ %b = load i32*, i32** %a
+ %c = ptrtoint i32* %b to i64
+ ret i64 %c
diff --git a/test/Transforms/InstCombine/pr23751.ll b/test/Transforms/InstCombine/pr23751.ll
new file mode 100644
index 0000000..d7840be
--- /dev/null
+++ b/test/Transforms/InstCombine/pr23751.ll
@@ -0,0 +1,13 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+@d = common global i32 0, align 4
+define i1 @f(i8 zeroext %p) #1 {
+; CHECK-NOT: ret i1 false
+ %1 = zext i8 %p to i32
+ %2 = load i32, i32* @d, align 4
+ %3 = or i32 %2, -2
+ %4 = add nsw i32 %3, %1
+ %5 = icmp ugt i32 %1, %4
+ ret i1 %5
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index e4bc96c..27e487b 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -1532,3 +1532,16 @@ define i32 @test_max_of_min(i32 %a) {
%s1 = select i1 %c1, i32 %s0, i32 -1
ret i32 %s1
+define i32 @PR23757(i32 %x) {
+; CHECK-LABEL: @PR23757
+; CHECK: %[[cmp:.*]] = icmp eq i32 %x, 2147483647
+; CHECK-NEXT: %[[add:.*]] = add nsw i32 %x, 1
+; CHECK-NEXT: %[[sel:.*]] = select i1 %[[cmp]], i32 -2147483648, i32 %[[add]]
+; CHECK-NEXT: ret i32 %[[sel]]
+ %cmp = icmp eq i32 %x, 2147483647
+ %add = add nsw i32 %x, 1
+ %sel = select i1 %cmp, i32 -2147483648, i32 %add
+ ret i32 %sel
diff --git a/test/Transforms/LoopUnroll/full-unroll-bad-cost.ll b/test/Transforms/LoopUnroll/full-unroll-bad-cost.ll
new file mode 100644
index 0000000..e5694fb
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-bad-cost.ll
@@ -0,0 +1,58 @@
+; RUN: opt -S -loop-unroll < %s | FileCheck %s
+; LLVM should not try to fully unroll this loop.
+declare void @f()
+declare void @g()
+declare void @h()
+define void @trivial_loop() {
+; CHECK-LABEL: @trivial_loop(
+ entry:
+ br label %loop
+ loop:
+ %idx = phi i32 [ 0, %entry ], [, %loop ]
+ = add i32 %idx, 1
+ call void @f()
+ call void @g()
+ call void @h()
+ call void @f()
+ call void @g()
+ call void @h()
+ call void @f()
+ call void @g()
+ call void @h()
+ call void @f()
+ call void @g()
+ call void @h()
+ call void @f()
+ call void @g()
+ call void @h()
+ %be = icmp slt i32 %idx, 268435456
+ br i1 %be, label %loop, label %exit
+; CHECK: loop:
+; CHECK-NEXT: %idx = phi i32 [ 0, %entry ], [, %loop ]
+; CHECK-NEXT: = add i32 %idx, 1
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: call void @g()
+; CHECK-NEXT: call void @h()
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: call void @g()
+; CHECK-NEXT: call void @h()
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: call void @g()
+; CHECK-NEXT: call void @h()
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: call void @g()
+; CHECK-NEXT: call void @h()
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: call void @g()
+; CHECK-NEXT: call void @h()
+; CHECK-NEXT: %be = icmp slt i32 %idx, 268435456
+; CHECK-NEXT: br i1 %be, label %loop, label %exit
+ exit:
+ ret void
diff --git a/test/Transforms/LoopUnroll/full-unroll-bad-geps.ll b/test/Transforms/LoopUnroll/full-unroll-bad-geps.ll
index 4c99bc7..ac81452 100644
--- a/test/Transforms/LoopUnroll/full-unroll-bad-geps.ll
+++ b/test/Transforms/LoopUnroll/full-unroll-bad-geps.ll
@@ -1,5 +1,5 @@
; Check that we don't crash on corner cases.
-; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=10 -unroll-threshold=10 -unroll-percent-of-optimized-for-complete-unroll=20 -o /dev/null
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -o /dev/null
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
define void @foo1() {
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics.ll
index 2dab2fb..904a65a 100644
--- a/test/Transforms/LoopUnroll/full-unroll-heuristics.ll
+++ b/test/Transforms/LoopUnroll/full-unroll-heuristics.ll
@@ -1,8 +1,8 @@
; In this test we check how heuristics for complete unrolling work. We have
; three knobs:
; 1) -unroll-threshold
-; 2) -unroll-absolute-threshold and
-; 3) -unroll-percent-of-optimized-for-complete-unroll
+; 3) -unroll-percent-dynamic-cost-saved-threshold and
+; 2) -unroll-dynamic-cost-savings-discount
; They control loop-unrolling according to the following rules:
; * If size of unrolled loop exceeds the absoulte threshold, we don't unroll
@@ -17,10 +17,10 @@
; optimizations to remove ~55% of the instructions, the loop body size is 9,
; and unrolled size is 65.
-; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=10 -unroll-threshold=10 -unroll-percent-of-optimized-for-complete-unroll=20 | FileCheck %s -check-prefix=TEST1
-; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=100 -unroll-threshold=10 -unroll-percent-of-optimized-for-complete-unroll=20 | FileCheck %s -check-prefix=TEST2
-; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=100 -unroll-threshold=10 -unroll-percent-of-optimized-for-complete-unroll=80 | FileCheck %s -check-prefix=TEST3
-; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=100 -unroll-threshold=100 -unroll-percent-of-optimized-for-complete-unroll=80 | FileCheck %s -check-prefix=TEST4
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -unroll-dynamic-cost-savings-discount=0 | FileCheck %s -check-prefix=TEST1
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -unroll-dynamic-cost-savings-discount=90 | FileCheck %s -check-prefix=TEST2
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=80 -unroll-dynamic-cost-savings-discount=90 | FileCheck %s -check-prefix=TEST3
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=100 -unroll-percent-dynamic-cost-saved-threshold=80 -unroll-dynamic-cost-savings-discount=0 | FileCheck %s -check-prefix=TEST4
; If the absolute threshold is too low, or if we can't optimize away requested
; percent of instructions, we shouldn't unroll:
diff --git a/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll b/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
index 4cd703f..f16ee41 100644
--- a/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
+++ b/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S < %s -loop-vectorize 2>&1 | FileCheck %s
-; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 | FileCheck %s --check-prefix=FORCE-VEC
+; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-interleaved-mem-accesses=true | FileCheck %s
+; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -enable-interleaved-mem-accesses=true | FileCheck %s --check-prefix=FORCE-VEC
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnueabi"
@@ -102,26 +102,23 @@ for.end: ; preds = %for.body
; }
; CHECK-LABEL: @ptr_ind_plus2(
-; CHECK: load i32, i32*
-; CHECK: load i32, i32*
-; CHECK: load i32, i32*
-; CHECK: load i32, i32*
-; CHECK: mul nsw i32
-; CHECK: mul nsw i32
-; CHECK: add nsw i32
-; CHECK: add nsw i32
-; CHECK: = add i64 %index, 2
-; CHECK: %21 = icmp eq i64, 1024
+; CHECK: %[[V0:.*]] = load <8 x i32>
+; CHECK: shufflevector <8 x i32> %[[V0]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK: shufflevector <8 x i32> %[[V0]], <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK: %[[V1:.*]] = load <8 x i32>
+; CHECK: shufflevector <8 x i32> %[[V1]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK: shufflevector <8 x i32> %[[V1]], <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK: mul nsw <4 x i32>
+; CHECK: mul nsw <4 x i32>
+; CHECK: add nsw <4 x i32>
+; CHECK: add nsw <4 x i32>
+; CHECK: = add i64 %index, 8
+; CHECK: icmp eq i64, 1024
; FORCE-VEC-LABEL: @ptr_ind_plus2(
-; FORCE-VEC: load i32, i32*
-; FORCE-VEC: insertelement <2 x i32>
-; FORCE-VEC: load i32, i32*
-; FORCE-VEC: insertelement <2 x i32>
-; FORCE-VEC: load i32, i32*
-; FORCE-VEC: insertelement <2 x i32>
-; FORCE-VEC: load i32, i32*
-; FORCE-VEC: insertelement <2 x i32>
+; FORCE-VEC: %[[V:.*]] = load <4 x i32>
+; FORCE-VEC: shufflevector <4 x i32> %[[V]], <4 x i32> undef, <2 x i32> <i32 0, i32 2>
+; FORCE-VEC: shufflevector <4 x i32> %[[V]], <4 x i32> undef, <2 x i32> <i32 1, i32 3>
; FORCE-VEC: mul nsw <2 x i32>
; FORCE-VEC: add nsw <2 x i32>
; FORCE-VEC: = add i64 %index, 2
diff --git a/test/Transforms/LoopVectorize/interleaved-accesses.ll b/test/Transforms/LoopVectorize/interleaved-accesses.ll
new file mode 100644
index 0000000..d7237a5
--- /dev/null
+++ b/test/Transforms/LoopVectorize/interleaved-accesses.ll
@@ -0,0 +1,467 @@
+; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -runtime-memory-check-threshold=24 < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+; Check vectorization on an interleaved load group of factor 2 and an interleaved
+; store group of factor 2.
+; int AB[1024];
+; int CD[1024];
+; void test_array_load2_store2(int C, int D) {
+; for (int i = 0; i < 1024; i+=2) {
+; int A = AB[i];
+; int B = AB[i+1];
+; CD[i] = A + C;
+; CD[i+1] = B * D;
+; }
+; }
+; CHECK-LABEL: @test_array_load2_store2(
+; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
+; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK: add nsw <4 x i32>
+; CHECK: mul nsw <4 x i32>
+; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4
+@AB = common global [1024 x i32] zeroinitializer, align 4
+@CD = common global [1024 x i32] zeroinitializer, align 4
+define void @test_array_load2_store2(i32 %C, i32 %D) {
+ br label %for.body
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %arrayidx0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %indvars.iv
+ %tmp = load i32, i32* %arrayidx0, align 4
+ %tmp1 = or i64 %indvars.iv, 1
+ %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %tmp1
+ %tmp2 = load i32, i32* %arrayidx1, align 4
+ %add = add nsw i32 %tmp, %C
+ %mul = mul nsw i32 %tmp2, %D
+ %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %indvars.iv
+ store i32 %add, i32* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %tmp1
+ store i32 %mul, i32* %arrayidx3, align 4
+ = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp slt i64, 1024
+ br i1 %cmp, label %for.body, label %for.end
+for.end: ; preds = %for.body
+ ret void
+; int A[3072];
+; struct ST S[1024];
+; void test_struct_st3() {
+; int *ptr = A;
+; for (int i = 0; i < 1024; i++) {
+; int X1 = *ptr++;
+; int X2 = *ptr++;
+; int X3 = *ptr++;
+; T[i].x = X1 + 1;
+; T[i].y = X2 + 2;
+; T[i].z = X3 + 3;
+; }
+; }
+; CHECK-LABEL: @test_struct_array_load3_store3(
+; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4
+; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+; CHECK: add nsw <4 x i32> {{.*}}, <i32 1, i32 1, i32 1, i32 1>
+; CHECK: add nsw <4 x i32> {{.*}}, <i32 2, i32 2, i32 2, i32 2>
+; CHECK: add nsw <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3>
+; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* {{.*}}, align 4
+%struct.ST3 = type { i32, i32, i32 }
+@A = common global [3072 x i32] zeroinitializer, align 4
+@S = common global [1024 x %struct.ST3] zeroinitializer, align 4
+define void @test_struct_array_load3_store3() {
+ br label %for.body
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %ptr.016 = phi i32* [ getelementptr inbounds ([3072 x i32], [3072 x i32]* @A, i64 0, i64 0), %entry ], [ %incdec.ptr2, %for.body ]
+ %incdec.ptr = getelementptr inbounds i32, i32* %ptr.016, i64 1
+ %tmp = load i32, i32* %ptr.016, align 4
+ %incdec.ptr1 = getelementptr inbounds i32, i32* %ptr.016, i64 2
+ %tmp1 = load i32, i32* %incdec.ptr, align 4
+ %incdec.ptr2 = getelementptr inbounds i32, i32* %ptr.016, i64 3
+ %tmp2 = load i32, i32* %incdec.ptr1, align 4
+ %add = add nsw i32 %tmp, 1
+ %x = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 0
+ store i32 %add, i32* %x, align 4
+ %add3 = add nsw i32 %tmp1, 2
+ %y = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 1
+ store i32 %add3, i32* %y, align 4
+ %add6 = add nsw i32 %tmp2, 3
+ %z = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 2
+ store i32 %add6, i32* %z, align 4
+ = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+for.end: ; preds = %for.body
+ ret void
+; Check vectorization on an interleaved load group of factor 4.
+; struct ST4{
+; int x;
+; int y;
+; int z;
+; int w;
+; };
+; int test_struct_load4(struct ST4 *S) {
+; int r = 0;
+; for (int i = 0; i < 1024; i++) {
+; r += S[i].x;
+; r -= S[i].y;
+; r += S[i].z;
+; r -= S[i].w;
+; }
+; return r;
+; }
+; CHECK-LABEL: @test_struct_load4(
+; CHECK: %wide.vec = load <16 x i32>, <16 x i32>* {{.*}}, align 4
+; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK: add nsw <4 x i32>
+; CHECK: sub <4 x i32>
+; CHECK: add nsw <4 x i32>
+; CHECK: sub <4 x i32>
+%struct.ST4 = type { i32, i32, i32, i32 }
+define i32 @test_struct_load4(%struct.ST4* nocapture readonly %S) {
+ br label %for.body
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %r.022 = phi i32 [ 0, %entry ], [ %sub8, %for.body ]
+ %x = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 0
+ %tmp = load i32, i32* %x, align 4
+ %add = add nsw i32 %tmp, %r.022
+ %y = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 1
+ %tmp1 = load i32, i32* %y, align 4
+ %sub = sub i32 %add, %tmp1
+ %z = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 2
+ %tmp2 = load i32, i32* %z, align 4
+ %add5 = add nsw i32 %sub, %tmp2
+ %w = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 3
+ %tmp3 = load i32, i32* %w, align 4
+ %sub8 = sub i32 %add5, %tmp3
+ = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+for.end: ; preds = %for.body
+ ret i32 %sub8
+; Check vectorization on an interleaved store group of factor 4.
+; void test_struct_store4(int *A, struct ST4 *B) {
+; int *ptr = A;
+; for (int i = 0; i < 1024; i++) {
+; int X = *ptr++;
+; B[i].x = X + 1;
+; B[i].y = X * 2;
+; B[i].z = X + 3;
+; B[i].w = X + 4;
+; }
+; }
+; CHECK-LABEL: @test_struct_store4(
+; CHECK: %[[LD:.*]] = load <4 x i32>, <4 x i32>*
+; CHECK: add nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK: shl nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK: add nsw <4 x i32> %[[LD]], <i32 3, i32 3, i32 3, i32 3>
+; CHECK: add nsw <4 x i32> %[[LD]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK: store <16 x i32> %interleaved.vec, <16 x i32>* {{.*}}, align 4
+define void @test_struct_store4(i32* noalias nocapture readonly %A, %struct.ST4* noalias nocapture %B) {
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret void
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %ptr.024 = phi i32* [ %A, %entry ], [ %incdec.ptr, %for.body ]
+ %incdec.ptr = getelementptr inbounds i32, i32* %ptr.024, i64 1
+ %tmp = load i32, i32* %ptr.024, align 4
+ %add = add nsw i32 %tmp, 1
+ %x = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 0
+ store i32 %add, i32* %x, align 4
+ %mul = shl nsw i32 %tmp, 1
+ %y = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 1
+ store i32 %mul, i32* %y, align 4
+ %add3 = add nsw i32 %tmp, 3
+ %z = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 2
+ store i32 %add3, i32* %z, align 4
+ %add6 = add nsw i32 %tmp, 4
+ %w = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 3
+ store i32 %add6, i32* %w, align 4
+ = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64, 1024
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+; Check vectorization on a reverse interleaved load group of factor 2 and
+; a reverse interleaved store group of factor 2.
+; struct ST2 {
+; int x;
+; int y;
+; };
+; void test_reversed_load2_store2(struct ST2 *A, struct ST2 *B) {
+; for (int i = 1023; i >= 0; i--) {
+; int a = A[i].x + i; // interleaved load of index 0
+; int b = A[i].y - i; // interleaved load of index 1
+; B[i].x = a; // interleaved store of index 0
+; B[i].y = b; // interleaved store of index 1
+; }
+; }
+; CHECK-LABEL: @test_reversed_load2_store2(
+; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
+; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK: add nsw <4 x i32>
+; CHECK: sub nsw <4 x i32>
+; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4
+%struct.ST2 = type { i32, i32 }
+define void @test_reversed_load2_store2(%struct.ST2* noalias nocapture readonly %A, %struct.ST2* noalias nocapture %B) {
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret void
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 1023, %entry ], [, %for.body ]
+ %x = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 0
+ %tmp = load i32, i32* %x, align 4
+ %tmp1 = trunc i64 %indvars.iv to i32
+ %add = add nsw i32 %tmp, %tmp1
+ %y = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 1
+ %tmp2 = load i32, i32* %y, align 4
+ %sub = sub nsw i32 %tmp2, %tmp1
+ %x5 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 0
+ store i32 %add, i32* %x5, align 4
+ %y8 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 1
+ store i32 %sub, i32* %y8, align 4
+ = add nsw i64 %indvars.iv, -1
+ %cmp = icmp sgt i64 %indvars.iv, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+; Check vectorization on an interleaved load group of factor 2 with 1 gap
+; (missing the load of odd elements).
+; void even_load(int *A, int *B) {
+; for (unsigned i = 0; i < 1024; i+=2)
+; B[i/2] = A[i] * 2;
+; }
+; CHECK-LABEL: @even_load(
+; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
+; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NOT: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK: shl nsw <4 x i32> %strided.vec, <i32 1, i32 1, i32 1, i32 1>
+define void @even_load(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret void
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %tmp = load i32, i32* %arrayidx, align 4
+ %mul = shl nsw i32 %tmp, 1
+ %tmp1 = lshr exact i64 %indvars.iv, 1
+ %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %tmp1
+ store i32 %mul, i32* %arrayidx2, align 4
+ = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp ult i64, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+; Check vectorization on interleaved access groups identified from mixed
+; loads/stores.
+; void mixed_load2_store2(int *A, int *B) {
+; for (unsigned i = 0; i < 1024; i+=2) {
+; B[i] = A[i] * A[i+1];
+; B[i+1] = A[i] + A[i+1];
+; }
+; }
+; CHECK-LABEL: @mixed_load2_store2(
+; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
+; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK: %interleaved.vec = shufflevector <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK: store <8 x i32> %interleaved.vec
+define void @mixed_load2_store2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret void
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %tmp = load i32, i32* %arrayidx, align 4
+ %tmp1 = or i64 %indvars.iv, 1
+ %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %tmp1
+ %tmp2 = load i32, i32* %arrayidx2, align 4
+ %mul = mul nsw i32 %tmp2, %tmp
+ %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ store i32 %mul, i32* %arrayidx4, align 4
+ %tmp3 = load i32, i32* %arrayidx, align 4
+ %tmp4 = load i32, i32* %arrayidx2, align 4
+ %add10 = add nsw i32 %tmp4, %tmp3
+ %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 %tmp1
+ store i32 %add10, i32* %arrayidx13, align 4
+ = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp ult i64, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+; Check vectorization on interleaved access groups identified from mixed
+; loads/stores.
+; void mixed_load3_store3(int *A) {
+; for (unsigned i = 0; i < 1024; i++) {
+; *A++ += i;
+; *A++ += i;
+; *A++ += i;
+; }
+; }
+; CHECK-LABEL: @mixed_load3_store3(
+; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4
+; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+; CHECK: %interleaved.vec = shufflevector <8 x i32> %{{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* %{{.*}}, align 4
+define void @mixed_load3_store3(i32* nocapture %A) {
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ ret void
+for.body: ; preds = %for.body, %entry
+ %i.013 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %A.addr.012 = phi i32* [ %A, %entry ], [ %incdec.ptr3, %for.body ]
+ %incdec.ptr = getelementptr inbounds i32, i32* %A.addr.012, i64 1
+ %tmp = load i32, i32* %A.addr.012, align 4
+ %add = add i32 %tmp, %i.013
+ store i32 %add, i32* %A.addr.012, align 4
+ %incdec.ptr1 = getelementptr inbounds i32, i32* %A.addr.012, i64 2
+ %tmp1 = load i32, i32* %incdec.ptr, align 4
+ %add2 = add i32 %tmp1, %i.013
+ store i32 %add2, i32* %incdec.ptr, align 4
+ %incdec.ptr3 = getelementptr inbounds i32, i32* %A.addr.012, i64 3
+ %tmp2 = load i32, i32* %incdec.ptr1, align 4
+ %add4 = add i32 %tmp2, %i.013
+ store i32 %add4, i32* %incdec.ptr1, align 4
+ %inc = add nuw nsw i32 %i.013, 1
+ %exitcond = icmp eq i32 %inc, 1024
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+; Check vectorization on interleaved access groups with members having different
+; kinds of type.
+; struct IntFloat {
+; int a;
+; float b;
+; };
+; int SA;
+; float SB;
+; void int_float_struct(struct IntFloat *A) {
+; int SumA;
+; float SumB;
+; for (unsigned i = 0; i < 1024; i++) {
+; SumA += A[i].a;
+; SumB += A[i].b;
+; }
+; SA = SumA;
+; SB = SumB;
+; }
+; CHECK-LABEL: @int_float_struct(
+; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
+; CHECK: %[[V0:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK: %[[V1:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK: bitcast <4 x i32> %[[V1]] to <4 x float>
+; CHECK: add nsw <4 x i32>
+; CHECK: fadd fast <4 x float>
+%struct.IntFloat = type { i32, float }
+@SA = common global i32 0, align 4
+@SB = common global float 0.000000e+00, align 4
+define void @int_float_struct(%struct.IntFloat* nocapture readonly %A) #0 {
+ br label %for.body
+for.cond.cleanup: ; preds = %for.body
+ store i32 %add, i32* @SA, align 4
+ store float %add3, float* @SB, align 4
+ ret void
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [, %for.body ]
+ %SumB.014 = phi float [ undef, %entry ], [ %add3, %for.body ]
+ %SumA.013 = phi i32 [ undef, %entry ], [ %add, %for.body ]
+ %a = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 0
+ %tmp = load i32, i32* %a, align 4
+ %add = add nsw i32 %tmp, %SumA.013
+ %b = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 1
+ %tmp1 = load float, float* %b, align 4
+ %add3 = fadd fast float %SumB.014, %tmp1
+ = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64, 1024
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/LoopVectorize/zero-sized-pointee-crash.ll b/test/Transforms/LoopVectorize/zero-sized-pointee-crash.ll
new file mode 100644
index 0000000..8771dd2
--- /dev/null
+++ b/test/Transforms/LoopVectorize/zero-sized-pointee-crash.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -loop-vectorize < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; CHECK-LABEL: @fn1
+define void @fn1() {
+ br label %middle
+ %0 = phi {}* [ %3, %middle ], [ inttoptr (i64 0 to {}*), %entry-block ]
+ %1 = bitcast {}* %0 to i8*
+ %2 = getelementptr i8, i8* %1, i64 1
+ %3 = bitcast i8* %2 to {}*
+ %4 = icmp eq i8* %2, undef
+ br i1 %4, label %exit, label %middle
+; CHECK: %[[phi:.*]] = phi {}* [ %3, %middle ], [ null, %entry-block ]
+; CHECK-NEXT: %[[bc1:.*]] = bitcast {}* %[[phi]] to i8*
+; CHECK-NEXT: %[[gep:.*]] = getelementptr i8, i8* %[[bc1]], i64 1
+; CHECK-NEXT: %[[bc2:.*]] = bitcast i8* %[[gep]] to {}*
+; CHECK-NEXT: %[[cmp:.*]] = icmp eq i8* %[[gep]], undef
+; CHECK-NEXT: br i1 %[[cmp]],
+ ret void
diff --git a/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll b/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
index b2083cb..99eba5e 100644
--- a/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
+++ b/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
@@ -63,16 +63,16 @@ lpad:
resume { i8*, i32 } zeroinitializer
-define i8 @call_same_range() {
-; CHECK-LABEL: @call_same_range
+define i8 @call_with_same_range() {
+; CHECK-LABEL: @call_with_same_range
; CHECK: tail call i8 @call_with_range
bitcast i8 0 to i8
%out = call i8 @dummy(), !range !0
ret i8 %out
-define i8 @invoke_same_range() {
-; CHECK-LABEL: @invoke_same_range()
+define i8 @invoke_with_same_range() {
+; CHECK-LABEL: @invoke_with_same_range()
; CHECK: tail call i8 @invoke_with_range()
%out = invoke i8 @dummy() to label %next unwind label %lpad, !range !0
diff --git a/test/Transforms/MergeFunc/linkonce_odr.ll b/test/Transforms/MergeFunc/linkonce_odr.ll
new file mode 100644
index 0000000..1ad0d72
--- /dev/null
+++ b/test/Transforms/MergeFunc/linkonce_odr.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+; Replacments should be totally ordered on the function name.
+; If we don't do this we can end up with one module defining a thunk for @funA
+; and another module defining a thunk for @funB.
+; The problem with this is that the linker could then choose these two stubs
+; each of the two modules and we end up with two stubs calling each other.
+; CHECK-LABEL: define linkonce_odr i32 @funA
+; CHECK-NEXT: add
+; CHECK: ret
+; CHECK-LABEL: define linkonce_odr i32 @funB
+; CHECK-NEXT: tail call i32 @funA(i32 %0, i32 %1)
+; CHECK-NEXT: ret
+define linkonce_odr i32 @funB(i32 %x, i32 %y) {
+ %sum = add i32 %x, %y
+ %sum2 = add i32 %x, %sum
+ %sum3 = add i32 %x, %sum2
+ ret i32 %sum3
+define linkonce_odr i32 @funA(i32 %x, i32 %y) {
+ %sum = add i32 %x, %y
+ %sum2 = add i32 %x, %sum
+ %sum3 = add i32 %x, %sum2
+ ret i32 %sum3
diff --git a/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll b/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll
index a620c98..d08c6f6 100644
--- a/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll
+++ b/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -nary-reassociate -S | FileCheck %s
+; RUN: opt < %s -nary-reassociate -early-cse -S | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-unknown-unknown"
@@ -27,24 +27,37 @@ define void @reassociate_gep(float* %a, i64 %i, i64 %j) {
; foo(&a[sext(j)]);
; foo(&a[sext(i +nsw j)]);
+; foo(&a[sext((i +nsw j) +nsw i)]);
; =>
-; t = &a[sext(j)];
-; foo(t);
-; foo(t + sext(i));
+; t1 = &a[sext(j)];
+; foo(t1);
+; t2 = t1 + sext(i);
+; foo(t2);
+; t3 = t2 + sext(i); // sext(i) should be GVN'ed.
+; foo(t3);
define void @reassociate_gep_nsw(float* %a, i32 %i, i32 %j) {
; CHECK-LABEL: @reassociate_gep_nsw(
- %1 = add nsw i32 %i, %j
- %idxprom.1 = sext i32 %1 to i64
%idxprom.j = sext i32 %j to i64
- %2 = getelementptr float, float* %a, i64 %idxprom.j
+ %1 = getelementptr float, float* %a, i64 %idxprom.j
; CHECK: [[t1:[^ ]+]] = getelementptr float, float* %a, i64 %idxprom.j
- call void @foo(float* %2)
+ call void @foo(float* %1)
; CHECK: call void @foo(float* [[t1]])
- %3 = getelementptr float, float* %a, i64 %idxprom.1
+ %2 = add nsw i32 %i, %j
+ %idxprom.2 = sext i32 %2 to i64
+ %3 = getelementptr float, float* %a, i64 %idxprom.2
; CHECK: [[sexti:[^ ]+]] = sext i32 %i to i64
; CHECK: [[t2:[^ ]+]] = getelementptr float, float* [[t1]], i64 [[sexti]]
call void @foo(float* %3)
; CHECK: call void @foo(float* [[t2]])
+ %4 = add nsw i32 %2, %i
+ %idxprom.4 = sext i32 %4 to i64
+ %5 = getelementptr float, float* %a, i64 %idxprom.4
+; CHECK: [[t3:[^ ]+]] = getelementptr float, float* [[t2]], i64 [[sexti]]
+ call void @foo(float* %5)
+; CHECK: call void @foo(float* [[t3]])
ret void
diff --git a/test/Transforms/Reassociate/basictest.ll b/test/Transforms/Reassociate/basictest.ll
index 015d3b0..caaf772 100644
--- a/test/Transforms/Reassociate/basictest.ll
+++ b/test/Transforms/Reassociate/basictest.ll
@@ -202,8 +202,8 @@ define i32 @test14(i32 %X1, i32 %X2) {
ret i32 %D
; CHECK-LABEL: @test14
-; CHECK-NEXT: sub i32 %X1, %X2
-; CHECK-NEXT: mul i32 %B2, 47
+; CHECK-NEXT: %[[SUB:.*]] = sub i32 %X1, %X2
+; CHECK-NEXT: mul i32 %[[SUB]], 47
; CHECK-NEXT: ret i32
diff --git a/test/Transforms/Reassociate/canonicalize-neg-const.ll b/test/Transforms/Reassociate/canonicalize-neg-const.ll
index e85a963..465460c 100644
--- a/test/Transforms/Reassociate/canonicalize-neg-const.ll
+++ b/test/Transforms/Reassociate/canonicalize-neg-const.ll
@@ -49,18 +49,6 @@ define double @test3(double %x, double %y) {
ret double %mul3
-; Canonicalize (x - -1234 * y)
-define i64 @test4(i64 %x, i64 %y) {
-; CHECK-LABEL: @test4
-; CHECK-NEXT: mul i64 %y, 1234
-; CHECK-NEXT: add i64 %mul, %x
-; CHECK-NEXT: ret i64 %sub
- %mul = mul i64 %y, -1234
- %sub = sub i64 %x, %mul
- ret i64 %sub
; Canonicalize (x - -0.1234 * y)
define double @test5(double %x, double %y) {
; CHECK-LABEL: @test5
@@ -156,3 +144,13 @@ define double @test12(double %x, double %y) {
%add = fadd double %div, %x
ret double %add
+; Don't create an NSW violation
+define i4 @test13(i4 %x) {
+; CHECK-LABEL: @test13
+; CHECK-NEXT: %[[mul:.*]] = mul nsw i4 %x, -2
+; CHECK-NEXT: %[[add:.*]] = add i4 %[[mul]], 3
+ %mul = mul nsw i4 %x, -2
+ %add = add i4 %mul, 3
+ ret i4 %add
diff --git a/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll b/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll
new file mode 100644
index 0000000..5913db2
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll
@@ -0,0 +1,77 @@
+; RUN: opt -S -rewrite-statepoints-for-gc < %s | FileCheck %s
+declare void @foo()
+declare i8 addrspace(1)* @some_function()
+declare void @some_function_consumer(i8 addrspace(1)*)
+declare dereferenceable(4) i8 addrspace(1)* @some_function_ret_deref()
+; CHECK: declare i8 addrspace(1)* @some_function_ret_deref()
+define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* dereferenceable(4) %a) gc "statepoint-example" {
+; CHECK: define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* %a)
+ call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i8 addrspace(1)* %a
+define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* dereferenceable_or_null(4) %a) gc "statepoint-example" {
+; CHECK: define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* %a)
+ call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i8 addrspace(1)* %a
+define i8 addrspace(1)* @test_deref_retval() gc "statepoint-example" {
+; CHECK-LABEL: @test_deref_retval(
+ %a = call dereferenceable(4) i8 addrspace(1)* @some_function()
+; CHECK: %a = call i8 addrspace(1)* @some_function()
+ call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i8 addrspace(1)* %a
+define i8 addrspace(1)* @test_deref_or_null_retval() gc "statepoint-example" {
+; CHECK-LABEL: @test_deref_or_null_retval(
+ %a = call dereferenceable_or_null(4) i8 addrspace(1)* @some_function()
+; CHECK: %a = call i8 addrspace(1)* @some_function()
+ call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i8 addrspace(1)* %a
+define i8 @test_md(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_md(
+ entry:
+; CHECK: %tmp = load i8, i8 addrspace(1)* %ptr, !tbaa !0
+ %tmp = load i8, i8 addrspace(1)* %ptr, !tbaa !0
+ call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i8 %tmp
+define i8 addrspace(1)* @test_decl_only_attribute(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_decl_only_attribute(
+; No change here, but the prototype of some_function_ret_deref should have changed.
+; CHECK: call i8 addrspace(1)* @some_function_ret_deref()
+ %a = call i8 addrspace(1)* @some_function_ret_deref()
+ call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i8 addrspace(1)* %a
+define i8 addrspace(1)* @test_callsite_arg_attribute(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_callsite_arg_attribute(
+; CHECK: call void @some_function_consumer(i8 addrspace(1)* %ptr)
+ call void @some_function_consumer(i8 addrspace(1)* dereferenceable(4) %ptr)
+ call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i8 addrspace(1)* %ptr
+declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+!0 = !{!1, !1, i64 0, i64 1}
+!1 = !{!"red", !2}
+!2 = !{!"blue"}
+; CHECK: !0 = !{!1, !1, i64 0}
+; CHECK: !1 = !{!"red", !2}
+; CHECK: !2 = !{!"blue"}
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/R600/lit.local.cfg b/test/Transforms/SeparateConstOffsetFromGEP/R600/lit.local.cfg
new file mode 100644
index 0000000..4086e8d
--- /dev/null
+++ b/test/Transforms/SeparateConstOffsetFromGEP/R600/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'R600' in config.root.targets:
+ config.unsupported = True
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/R600/split-gep-and-gvn-addrspace-addressing-modes.ll b/test/Transforms/SeparateConstOffsetFromGEP/R600/split-gep-and-gvn-addrspace-addressing-modes.ll
new file mode 100644
index 0000000..527634d
--- /dev/null
+++ b/test/Transforms/SeparateConstOffsetFromGEP/R600/split-gep-and-gvn-addrspace-addressing-modes.ll
@@ -0,0 +1,94 @@
+; RUN: opt -mtriple=amdgcn-- -S -separate-const-offset-from-gep -reassociate-geps-verify-no-dead-code -gvn < %s | FileCheck -check-prefix=IR %s
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+@array = internal addrspace(2) constant [4096 x [32 x float]] zeroinitializer, align 4
+; IR-LABEL: @sum_of_array(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 1
+; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 32
+; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 33
+define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
+ %tmp = sext i32 %y to i64
+ %tmp1 = sext i32 %x to i64
+ %tmp2 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp
+ %tmp4 = load float, float addrspace(2)* %tmp2, align 4
+ %tmp5 = fadd float %tmp4, 0.000000e+00
+ %tmp6 = add i32 %y, 1
+ %tmp7 = sext i32 %tmp6 to i64
+ %tmp8 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp7
+ %tmp10 = load float, float addrspace(2)* %tmp8, align 4
+ %tmp11 = fadd float %tmp5, %tmp10
+ %tmp12 = add i32 %x, 1
+ %tmp13 = sext i32 %tmp12 to i64
+ %tmp14 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp
+ %tmp16 = load float, float addrspace(2)* %tmp14, align 4
+ %tmp17 = fadd float %tmp11, %tmp16
+ %tmp18 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp7
+ %tmp20 = load float, float addrspace(2)* %tmp18, align 4
+ %tmp21 = fadd float %tmp17, %tmp20
+ store float %tmp21, float addrspace(1)* %output, align 4
+ ret void
+@array2 = internal addrspace(2) constant [4096 x [4 x float]] zeroinitializer, align 4
+; Some of the indices go over the maximum mubuf offset, so don't split them.
+; IR-LABEL: @sum_of_array_over_max_mubuf_offset(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 255
+; IR: add i32 %x, 256
+; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+define void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
+ %tmp = sext i32 %y to i64
+ %tmp1 = sext i32 %x to i64
+ %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp
+ %tmp4 = load float, float addrspace(2)* %tmp2, align 4
+ %tmp5 = fadd float %tmp4, 0.000000e+00
+ %tmp6 = add i32 %y, 255
+ %tmp7 = sext i32 %tmp6 to i64
+ %tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp7
+ %tmp10 = load float, float addrspace(2)* %tmp8, align 4
+ %tmp11 = fadd float %tmp5, %tmp10
+ %tmp12 = add i32 %x, 256
+ %tmp13 = sext i32 %tmp12 to i64
+ %tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp
+ %tmp16 = load float, float addrspace(2)* %tmp14, align 4
+ %tmp17 = fadd float %tmp11, %tmp16
+ %tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp7
+ %tmp20 = load float, float addrspace(2)* %tmp18, align 4
+ %tmp21 = fadd float %tmp17, %tmp20
+ store float %tmp21, float addrspace(1)* %output, align 4
+ ret void
+@lds_array = internal addrspace(3) global [4096 x [4 x float]] undef, align 4
+; DS instructions have a larger immediate offset, so make sure these are OK.
+; IR-LABEL: @sum_of_lds_array_over_max_mubuf_offset(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %{{[a-zA-Z0-9]+}}, i32 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 255
+; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16128
+; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16383
+define void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
+ %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %y
+ %tmp4 = load float, float addrspace(3)* %tmp2, align 4
+ %tmp5 = fadd float %tmp4, 0.000000e+00
+ %tmp6 = add i32 %y, 255
+ %tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %tmp6
+ %tmp10 = load float, float addrspace(3)* %tmp8, align 4
+ %tmp11 = fadd float %tmp5, %tmp10
+ %tmp12 = add i32 %x, 4032
+ %tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %tmp12, i32 %y
+ %tmp16 = load float, float addrspace(3)* %tmp14, align 4
+ %tmp17 = fadd float %tmp11, %tmp16
+ %tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %tmp12, i32 %tmp6
+ %tmp20 = load float, float addrspace(3)* %tmp18, align 4
+ %tmp21 = fadd float %tmp17, %tmp20
+ store float %tmp21, float addrspace(1)* %output, align 4
+ ret void
diff --git a/test/Transforms/Sink/convergent.ll b/test/Transforms/Sink/convergent.ll
new file mode 100644
index 0000000..49207db
--- /dev/null
+++ b/test/Transforms/Sink/convergent.ll
@@ -0,0 +1,24 @@
+; RUN: opt -sink -S < %s | FileCheck %s
+; Verify that IR sinking does not move convergent operations to
+; blocks that are not control equivalent.
+; CHECK: define i32 @foo
+; CHECK: entry
+; CHECK-NEXT: call i32 @bar
+; CHECK-NEXT: br i1 %arg
+define i32 @foo(i1 %arg) {
+ %c = call i32 @bar() readonly convergent
+ br i1 %arg, label %then, label %end
+ ret i32 %c
+ ret i32 0
+declare i32 @bar() readonly convergent
diff --git a/test/tools/dsymutil/Inputs/frame-dw2.ll b/test/tools/dsymutil/Inputs/frame-dw2.ll
new file mode 100644
index 0000000..7ffc933
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/frame-dw2.ll
@@ -0,0 +1,71 @@
+; Generated from frame.c on Darwin with '-arch i386 -g -emit-llvm'
+; ModuleID = 'frame.c'
+target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.11.0"
+; Function Attrs: nounwind ssp
+define i32 @bar(i32 %b) #0 {
+ %b.addr = alloca i32, align 4
+ %var = alloca i32, align 4
+ store i32 %b, i32* %b.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !13, metadata !14), !dbg !15
+ call void @llvm.dbg.declare(metadata i32* %var, metadata !16, metadata !14), !dbg !17
+ %0 = load i32, i32* %b.addr, align 4, !dbg !18
+ %add = add nsw i32 %0, 1, !dbg !19
+ store i32 %add, i32* %var, align 4, !dbg !17
+ %call = call i32 @foo(i32* %var), !dbg !20
+ ret i32 %call, !dbg !21
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+declare i32 @foo(i32*) #2
+; Function Attrs: nounwind ssp
+define i32 @baz(i32 %b) #0 {
+ %b.addr = alloca i32, align 4
+ store i32 %b, i32* %b.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !22, metadata !14), !dbg !23
+ %0 = load i32, i32* %b.addr, align 4, !dbg !24
+ %call = call i32 @bar(i32 %0), !dbg !25
+ ret i32 %call, !dbg !26
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="yonah" "target-features"="+cx16,+sse,+sse2,+sse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="yonah" "target-features"="+cx16,+sse,+sse2,+sse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+! = !{!0}
+!llvm.module.flags = !{!9, !10, !11}
+!llvm.ident = !{!12}
+!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "frame.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4, !8}
+!4 = !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32)* @bar, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !DISubprogram(name: "baz", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32)* @baz, variables: !2)
+!9 = !{i32 2, !"Dwarf Version", i32 2}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{i32 1, !"PIC Level", i32 2}
+!12 = !{!"clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)"}
+!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 1, scope: !4, file: !1, line: 3, type: !7)
+!14 = !DIExpression()
+!15 = !DILocation(line: 3, column: 13, scope: !4)
+!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "var", scope: !4, file: !1, line: 4, type: !7)
+!17 = !DILocation(line: 4, column: 6, scope: !4)
+!18 = !DILocation(line: 4, column: 12, scope: !4)
+!19 = !DILocation(line: 4, column: 14, scope: !4)
+!20 = !DILocation(line: 5, column: 9, scope: !4)
+!21 = !DILocation(line: 5, column: 2, scope: !4)
+!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 1, scope: !8, file: !1, line: 8, type: !7)
+!23 = !DILocation(line: 8, column: 13, scope: !8)
+!24 = !DILocation(line: 9, column: 13, scope: !8)
+!25 = !DILocation(line: 9, column: 9, scope: !8)
+!26 = !DILocation(line: 9, column: 2, scope: !8)
diff --git a/test/tools/dsymutil/Inputs/frame-dw4.ll b/test/tools/dsymutil/Inputs/frame-dw4.ll
new file mode 100644
index 0000000..c8674b1
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/frame-dw4.ll
@@ -0,0 +1,71 @@
+; Generated from frame.c on Darwin with '-arch i386 -gdwarf-4 -emit-llvm'
+; ModuleID = 'frame.c'
+target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.11.0"
+; Function Attrs: nounwind ssp
+define i32 @bar(i32 %b) #0 {
+ %b.addr = alloca i32, align 4
+ %var = alloca i32, align 4
+ store i32 %b, i32* %b.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !13, metadata !14), !dbg !15
+ call void @llvm.dbg.declare(metadata i32* %var, metadata !16, metadata !14), !dbg !17
+ %0 = load i32, i32* %b.addr, align 4, !dbg !18
+ %add = add nsw i32 %0, 1, !dbg !19
+ store i32 %add, i32* %var, align 4, !dbg !17
+ %call = call i32 @foo(i32* %var), !dbg !20
+ ret i32 %call, !dbg !21
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+declare i32 @foo(i32*) #2
+; Function Attrs: nounwind ssp
+define i32 @baz(i32 %b) #0 {
+ %b.addr = alloca i32, align 4
+ store i32 %b, i32* %b.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !22, metadata !14), !dbg !23
+ %0 = load i32, i32* %b.addr, align 4, !dbg !24
+ %call = call i32 @bar(i32 %0), !dbg !25
+ ret i32 %call, !dbg !26
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="yonah" "target-features"="+cx16,+sse,+sse2,+sse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="yonah" "target-features"="+cx16,+sse,+sse2,+sse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+! = !{!0}
+!llvm.module.flags = !{!9, !10, !11}
+!llvm.ident = !{!12}
+!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "frame.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4, !8}
+!4 = !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32)* @bar, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !DISubprogram(name: "baz", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32)* @baz, variables: !2)
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{i32 1, !"PIC Level", i32 2}
+!12 = !{!"clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)"}
+!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 1, scope: !4, file: !1, line: 3, type: !7)
+!14 = !DIExpression()
+!15 = !DILocation(line: 3, column: 13, scope: !4)
+!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "var", scope: !4, file: !1, line: 4, type: !7)
+!17 = !DILocation(line: 4, column: 6, scope: !4)
+!18 = !DILocation(line: 4, column: 12, scope: !4)
+!19 = !DILocation(line: 4, column: 14, scope: !4)
+!20 = !DILocation(line: 5, column: 9, scope: !4)
+!21 = !DILocation(line: 5, column: 2, scope: !4)
+!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 1, scope: !8, file: !1, line: 8, type: !7)
+!23 = !DILocation(line: 8, column: 13, scope: !8)
+!24 = !DILocation(line: 9, column: 13, scope: !8)
+!25 = !DILocation(line: 9, column: 9, scope: !8)
+!26 = !DILocation(line: 9, column: 2, scope: !8)
diff --git a/test/tools/dsymutil/Inputs/frame.c b/test/tools/dsymutil/Inputs/frame.c
new file mode 100644
index 0000000..9ca082d
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/frame.c
@@ -0,0 +1,10 @@
+int foo(int *f);
+int bar(int b) {
+ int var = b + 1;
+ return foo(&var);
+int baz(int b) {
+ return bar(b);
diff --git a/test/tools/dsymutil/X86/basic-linking-x86.test b/test/tools/dsymutil/X86/basic-linking-x86.test
index 1059e23..19b4e3b 100644
--- a/test/tools/dsymutil/X86/basic-linking-x86.test
+++ b/test/tools/dsymutil/X86/basic-linking-x86.test
@@ -6,6 +6,8 @@ RUN: llvm-dsymutil -o %t2 -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_6
RUN: llvm-dwarfdump %t2 | FileCheck %s
RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=BASIC
RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-archive.macho.x86_64 | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=ARCHIVE
+RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 | llvm-dsymutil -y -o - - | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=BASIC
+RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p/.. %p/../Inputs/basic-archive.macho.x86_64 | llvm-dsymutil -o - -y - | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=ARCHIVE
CHECK: file format Mach-O 64-bit x86-64
@@ -118,7 +120,7 @@ ARCHIVE: DW_AT_location [DW_FORM_block1] (<0x09> 03 08 10 00 00 01 00 00 00 )
CHECK: DW_TAG_volatile_type [10]
CHECK: DW_AT_type [DW_FORM_ref4] (cu + 0x0041 => {0x00000167})
CHECK: DW_TAG_base_type [4]
-CHACK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000060] = "int")
+CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000060] = "int")
CHECK: DW_TAG_subprogram [2] *
CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x0000009b] = "bar")
CHECK: DW_AT_type [DW_FORM_ref4] (cu + 0x0041 => {0x00000167})
diff --git a/test/tools/dsymutil/X86/basic-lto-linking-x86.test b/test/tools/dsymutil/X86/basic-lto-linking-x86.test
index 22b6e08..395234e 100644
--- a/test/tools/dsymutil/X86/basic-lto-linking-x86.test
+++ b/test/tools/dsymutil/X86/basic-lto-linking-x86.test
@@ -1,5 +1,6 @@
RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-lto.macho.x86_64 | llvm-dwarfdump - | FileCheck %s
+RUN: llvm-dsymutil -oso-prepend-path=%p/.. -dump-debug-map %p/../Inputs/basic-lto.macho.x86_64 | llvm-dsymutil -o - -y - | llvm-dwarfdump - | FileCheck %s
CHECK: file format Mach-O 64-bit x86-64
diff --git a/test/tools/dsymutil/X86/frame-1.test b/test/tools/dsymutil/X86/frame-1.test
new file mode 100644
index 0000000..7852e68
--- /dev/null
+++ b/test/tools/dsymutil/X86/frame-1.test
@@ -0,0 +1,32 @@
+# REQUIRES: object-emission
+# RUN: rm -rf %t
+# RUN: mkdir -p %t
+# RUN: llc -filetype=obj %p/../Inputs/frame-dw2.ll -o %t/frame-dw2.o
+# RUN: llvm-dsymutil -oso-prepend-path=%t -y %s -o - | llvm-dwarfdump -debug-dump=frames - | FileCheck %s
+# This test is meant to verify that identical CIEs will get reused
+# in the same file but also inbetween files. For this to happen, we
+# link twice the same file using this made-up debug map:
+triple: 'i386-unknown-unknown-macho'
+ - filename: frame-dw2.o
+ symbols:
+ - { sym: _bar, objAddr: 0x0, binAddr: 0x1000, size: 0x12 }
+ - { sym: _baz, objAddr: 0x0, binAddr: 0x2000, size: 0x12 }
+ - filename: frame-dw2.o
+ symbols:
+ - { sym: _baz, objAddr: 0x0, binAddr: 0x3000, size: 0x12 }
+# CHECK: .debug_frame contents:
+# CHECK: 00000000 {{[0-9a-f]*}} ffffffff CIE
+# CHECK: FDE cie=00000000 pc=00001000...00001
+# CHECK: FDE cie=00000000 pc=00002000...00002
+# CHECK: FDE cie=00000000 pc=00003000...00003
diff --git a/test/tools/dsymutil/X86/frame-2.test b/test/tools/dsymutil/X86/frame-2.test
new file mode 100644
index 0000000..168e342
--- /dev/null
+++ b/test/tools/dsymutil/X86/frame-2.test
@@ -0,0 +1,47 @@
+# REQUIRES: object-emission
+# RUN: rm -rf %t
+# RUN: mkdir -p %t
+# RUN: llc -filetype=obj %p/../Inputs/frame-dw2.ll -o %t/frame-dw2.o
+# RUN: llc -filetype=obj %p/../Inputs/frame-dw4.ll -o %t/frame-dw4.o
+# RUN: llvm-dsymutil -oso-prepend-path=%t -y %s -o - | llvm-dwarfdump -debug-dump=frames - | FileCheck %s
+# Check the handling of multiple different CIEs. To have CIEs that
+# appear to be different, use a dwarf2 version of the file along with
+# a dwarf 4 version. The CIE header version (and layout) will be different.
+# FIXME: this test also checks that we didn't reuse the first CIE when it
+# appears again. This is a behavior we inherited from dsymutil-classic
+# but this should be fixed (see comment in patchFrameInfoForObject())
+triple: 'i386-unknown-unknown-macho'
+ - filename: frame-dw2.o
+ symbols:
+ - { sym: _bar, objAddr: 0x0, binAddr: 0x1000, size: 0x12 }
+ - { sym: _baz, objAddr: 0x0, binAddr: 0x2000, size: 0x12 }
+ - filename: frame-dw4.o
+ symbols:
+ - { sym: _baz, objAddr: 0x0, binAddr: 0x3000, size: 0x12 }
+ - filename: frame-dw2.o
+ symbols:
+ - { sym: _bar, objAddr: 0x0, binAddr: 0x4000, size: 0x12 }
+# CHECK: .debug_frame contents:
+# CHECK: 00000000 {{[0-9a-f]*}} ffffffff CIE
+# CHECK-NEXT: Version:{{.*}}1
+# CHECK: FDE cie=00000000 pc=00001000...00001
+# CHECK: FDE cie=00000000 pc=00002000...00002
+# CHECK: [[CIEDW4:[0-9a-f]*]] 00000010 ffffffff CIE
+# CHECK-NEXT: Version:{{.*}}4
+# CHECK: FDE cie=[[CIEDW4]] pc=00003000...00003
+# CHECK: [[CIEDW2:[0-9a-f]*]] {{[0-9a-f]*}} ffffffff CIE
+# CHECK-NEXT: Version:{{.*}}1
+# CHECK: FDE cie=[[CIEDW2]] pc=00004000...00004
diff --git a/test/tools/dsymutil/debug-map-parsing.test b/test/tools/dsymutil/debug-map-parsing.test
index 3422316..5091dfb 100644
--- a/test/tools/dsymutil/debug-map-parsing.test
+++ b/test/tools/dsymutil/debug-map-parsing.test
@@ -1,40 +1,42 @@
-RUN: llvm-dsymutil -v -parse-only -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 | FileCheck %s
-RUN: llvm-dsymutil -v -parse-only -oso-prepend-path=%p %p/Inputs/basic-lto.macho.x86_64 | FileCheck %s --check-prefix=CHECK-LTO
-RUN: llvm-dsymutil -v -parse-only -oso-prepend-path=%p %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK-ARCHIVE
-RUN: llvm-dsymutil -v -parse-only %p/Inputs/basic.macho.x86_64 2>&1 | FileCheck %s --check-prefix=NOT-FOUND
-RUN: not llvm-dsymutil -v -parse-only %p/Inputs/inexistant 2>&1 | FileCheck %s --check-prefix=NO-EXECUTABLE
+RUN: llvm-dsymutil -v -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 | FileCheck %s
+RUN: llvm-dsymutil -v -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic-lto.macho.x86_64 | FileCheck %s --check-prefix=CHECK-LTO
+RUN: llvm-dsymutil -v -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK-ARCHIVE
+RUN: llvm-dsymutil -v -dump-debug-map %p/Inputs/basic.macho.x86_64 2>&1 | FileCheck %s --check-prefix=NOT-FOUND
+RUN: not llvm-dsymutil -v -dump-debug-map %p/Inputs/inexistant 2>&1 | FileCheck %s --check-prefix=NO-EXECUTABLE
Check that We can parse the debug map of the basic executable.
CHECK-NOT: error
-CHECK: DEBUG MAP: x86_64-unknown-unknown-macho
-CHECK: /Inputs/basic1.macho.x86_64.o:
-CHECK: 0000000000000000 => 0000000100000ea0+0x24 _main
-CHECK: /Inputs/basic2.macho.x86_64.o:
-CHECK: 0000000000000310 => 0000000100001000+0x0 _baz
-CHECK: 0000000000000020 => 0000000100000ed0+0x50 _foo
-CHECK: 0000000000000070 => 0000000100000f20+0x17 _inc
-CHECK: 0000000000000560 => 0000000100001008+0x0 _private_int
-CHECK: /Inputs/basic3.macho.x86_64.o:
-CHECK: 0000000000000020 => 0000000100000f40+0x50 _bar
-CHECK: 0000000000000070 => 0000000100000f90+0x19 _inc
-CHECK: 0000000000000004 => 0000000100001004+0x0 _val
+CHECK: ---
+CHECK: triple: 'x86_64-unknown-unknown-macho'
+CHECK: filename:{{.*}}/Inputs/basic1.macho.x86_64.o
+CHECK-DAG: sym: _main, objAddr: 0x0000000000000000, binAddr: 0x0000000100000EA0, size: 0x00000024
+CHECK: filename{{.*}}/Inputs/basic2.macho.x86_64.o
+CHECK-DAG: sym: _baz, objAddr: 0x0000000000000310, binAddr: 0x0000000100001000, size: 0x00000000
+CHECK-DAG: sym: _foo, objAddr: 0x0000000000000020, binAddr: 0x0000000100000ED0, size: 0x00000050
+CHECK-DAG: sym: _inc, objAddr: 0x0000000000000070, binAddr: 0x0000000100000F20, size: 0x00000017
+CHECK-DAG: sym: _private_int, objAddr: 0x0000000000000560, binAddr: 0x0000000100001008, size: 0x00000000
+CHECK: filename{{.*}}/Inputs/basic3.macho.x86_64.o
+CHECK-DAG: sym: _bar, objAddr: 0x0000000000000020, binAddr: 0x0000000100000F40, size: 0x00000050
+CHECK-DAG: sym: _inc, objAddr: 0x0000000000000070, binAddr: 0x0000000100000F90, size: 0x00000019
+CHECK-DAG: sym: _val, objAddr: 0x0000000000000004, binAddr: 0x0000000100001004, size: 0x00000000
+CHECK: ...
Check that we can parse the debug-map of the basic-lto executable
-CHECK-LTO: DEBUG MAP: x86_64-unknown-unknown-macho
-CHECK-LTO: /Inputs/basic-lto.macho.x86_64.o:
-CHECK-LTO: 0000000000000050 => 0000000100000f90+0x24 _bar
-CHECK-LTO: 0000000000000658 => 0000000100001000+0x0 _baz
-CHECK-LTO: 0000000000000010 => 0000000100000f50+0x40 _foo
-CHECK-LTO: 0000000000000000 => 0000000100000f40+0x10 _main
-CHECK-LTO: 00000000000008e8 => 0000000100001008+0x0 _private_int
-CHECK-LTO: 00000000000008ec => 0000000100001004+0x0 _val
+CHECK-LTO: triple: 'x86_64-unknown-unknown-macho'
+CHECK-LTO: /Inputs/basic-lto.macho.x86_64.o
+CHECK-LTO-DAG: sym: _bar, objAddr: 0x0000000000000050, binAddr: 0x0000000100000F90, size: 0x00000024
+CHECK-LTO-DAG: sym: _baz, objAddr: 0x0000000000000658, binAddr: 0x0000000100001000, size: 0x00000000
+CHECK-LTO-DAG: sym: _foo, objAddr: 0x0000000000000010, binAddr: 0x0000000100000F50, size: 0x00000040
+CHECK-LTO-DAG: sym: _main, objAddr: 0x0000000000000000, binAddr: 0x0000000100000F40, size: 0x00000010
+CHECK-LTO-DAG: sym: _private_int, objAddr: 0x00000000000008E8, binAddr: 0x0000000100001008, size: 0x00000000
+CHECK-LTO-DAG: sym: _val, objAddr: 0x00000000000008EC, binAddr: 0x0000000100001004, size: 0x00000000
Check thet we correctly handle debug maps with archive members (including only
opening the archive once if mulitple of its members are used).
@@ -48,20 +50,20 @@ CHECK-ARCHIVE-NEXT: opened new archive {{.*}}/libbasic.a'
CHECK-ARCHIVE-NEXT: found member in current archive.
CHECK-ARCHIVE-NEXT: trying to open {{.*}}/libbasic.a(basic3.macho.x86_64.o)'
CHECK-ARCHIVE-NEXT: found member in current archive.
-CHECK-ARCHIVE: DEBUG MAP: x86_64-unknown-unknown-macho
-CHECK-ARCHIVE: object addr => executable addr symbol name
-CHECK-ARCHIVE: /Inputs/basic1.macho.x86_64.o:
-CHECK-ARCHIVE: 0000000000000000 => 0000000100000ea0+0x24 _main
-CHECK-ARCHIVE: /Inputs/./libbasic.a(basic2.macho.x86_64.o):
-CHECK-ARCHIVE: 0000000000000310 => 0000000100001000+0x0 _baz
-CHECK-ARCHIVE: 0000000000000020 => 0000000100000ed0+0x50 _foo
-CHECK-ARCHIVE: 0000000000000070 => 0000000100000f20+0x17 _inc
-CHECK-ARCHIVE: 0000000000000560 => 0000000100001004+0x0 _private_int
-CHECK-ARCHIVE: /Inputs/./libbasic.a(basic3.macho.x86_64.o):
-CHECK-ARCHIVE: 0000000000000020 => 0000000100000f40+0x50 _bar
-CHECK-ARCHIVE: 0000000000000070 => 0000000100000f90+0x19 _inc
-CHECK-ARCHIVE: 0000000000000004 => 0000000100001008+0x0 _val
+CHECK-ARCHIVE: triple: 'x86_64-unknown-unknown-macho'
+CHECK-ARCHIVE: /Inputs/basic1.macho.x86_64.o
+CHECK-ARCHIVE-DAG: sym: _main, objAddr: 0x0000000000000000, binAddr: 0x0000000100000EA0, size: 0x00000024
+CHECK-ARCHIVE: /Inputs/./libbasic.a(basic2.macho.x86_64.o)
+CHECK-ARCHIVE-DAG: sym: _baz, objAddr: 0x0000000000000310, binAddr: 0x0000000100001000, size: 0x00000000
+CHECK-ARCHIVE-DAG: sym: _foo, objAddr: 0x0000000000000020, binAddr: 0x0000000100000ED0, size: 0x00000050
+CHECK-ARCHIVE-DAG: sym: _inc, objAddr: 0x0000000000000070, binAddr: 0x0000000100000F20, size: 0x00000017
+CHECK-ARCHIVE-DAG: sym: _private_int, objAddr: 0x0000000000000560, binAddr: 0x0000000100001004, size: 0x00000000
+CHECK-ARCHIVE: /Inputs/./libbasic.a(basic3.macho.x86_64.o)
+CHECK-ARCHIVE-DAG: sym: _bar, objAddr: 0x0000000000000020, binAddr: 0x0000000100000F40, size: 0x00000050
+CHECK-ARCHIVE-DAG: sym: _inc, objAddr: 0x0000000000000070, binAddr: 0x0000000100000F90, size: 0x00000019
+CHECK-ARCHIVE-DAG: sym: _val, objAddr: 0x0000000000000004, binAddr: 0x0000000100001008, size: 0x00000000
Check that we warn about missing object files (this presumes that the files aren't
present in the machine's /Inputs/ folder, which should be a pretty safe bet).
@@ -69,11 +71,11 @@ present in the machine's /Inputs/ folder, which should be a pretty safe bet).
NOT-FOUND: cannot open{{.*}}"/Inputs/basic1.macho.x86_64.o": {{[Nn]o}} such file
NOT-FOUND: cannot open{{.*}}"/Inputs/basic2.macho.x86_64.o": {{[Nn]o}} such file
NOT-FOUND: cannot open{{.*}}"/Inputs/basic3.macho.x86_64.o": {{[Nn]o}} such file
-NOT-FOUND-NEXT: object addr => executable addr symbol name
+NOT-FOUND-NEXT: triple: 'x86_64-unknown-unknown-macho'
Check that we correctly error out on invalid executatble.
NO-EXECUTABLE: cannot parse{{.*}}/inexistant": {{[Nn]o}} such file
diff --git a/test/tools/dsymutil/yaml-object-address-rewrite.test b/test/tools/dsymutil/yaml-object-address-rewrite.test
new file mode 100644
index 0000000..dcb39be
--- /dev/null
+++ b/test/tools/dsymutil/yaml-object-address-rewrite.test
@@ -0,0 +1,44 @@
+# RUN: llvm-dsymutil -v -dump-debug-map -oso-prepend-path=%p -y %s | FileCheck %s
+# The YAML debug map bellow is the one from basic-archive.macho.x86_64 with
+# the object addresses set to zero. Check that the YAML import is able to
+# rewrite these addresses to the right values.
+# CHECK: ---
+# CHECK-NEXT: triple:{{.*}}'x86_64-unknown-unknown-macho'
+# CHECK-NEXT: objects:
+# CHECK-NEXT: filename:{{.*}}/Inputs/basic1.macho.x86_64.o
+# CHECK-NEXT: symbols:
+# CHECK-NEXT: sym: _main, objAddr: 0x0000000000000000, binAddr: 0x0000000100000EA0, size: 0x00000024
+# CHECK-NEXT: filename:{{.*}}/Inputs/./libbasic.a(basic2.macho.x86_64.o)'
+# CHECK-NEXT: symbols:
+# CHECK-DAG: sym: _foo, objAddr: 0x0000000000000020, binAddr: 0x0000000100000ED0, size: 0x00000050
+# CHECK-DAG: sym: _private_int, objAddr: 0x0000000000000560, binAddr: 0x0000000100001004, size: 0x00000000
+# CHECK-DAG: sym: _inc, objAddr: 0x0000000000000070, binAddr: 0x0000000100000F20, size: 0x00000017
+# CHECK-DAG: sym: _baz, objAddr: 0x0000000000000310, binAddr: 0x0000000100001000, size: 0x00000000
+# CHECK-NOT: { sym:
+# CHECK-NEXT: filename:{{.*}}/Inputs/./libbasic.a(basic3.macho.x86_64.o)'
+# CHECK-NEXT: symbols:
+# CHECK-DAG: sym: _val, objAddr: 0x0000000000000004, binAddr: 0x0000000100001008, size: 0x00000000
+# CHECK-DAG: sym: _bar, objAddr: 0x0000000000000020, binAddr: 0x0000000100000F40, size: 0x00000050
+# CHECK-DAG: sym: _inc, objAddr: 0x0000000000000070, binAddr: 0x0000000100000F90, size: 0x00000019
+# CHECK-NOT: { sym:
+# CHECK-NEXT: ...
+triple: 'x86_64-unknown-unknown-macho'
+ - filename: /Inputs/basic1.macho.x86_64.o
+ symbols:
+ - { sym: _main, objAddr: 0x0, binAddr: 0x0000000100000EA0, size: 0x00000024 }
+ - filename: /Inputs/./libbasic.a(basic2.macho.x86_64.o)
+ symbols:
+ - { sym: _foo, objAddr: 0x0, binAddr: 0x0000000100000ED0, size: 0x00000050 }
+ - { sym: _private_int, objAddr: 0x0, binAddr: 0x0000000100001004, size: 0x00000000 }
+ - { sym: _inc, objAddr: 0x0, binAddr: 0x0000000100000F20, size: 0x00000017 }
+ - { sym: _baz, objAddr: 0x0, binAddr: 0x0000000100001000, size: 0x00000000 }
+ - filename: /Inputs/./libbasic.a(basic3.macho.x86_64.o)
+ symbols:
+ - { sym: _val, objAddr: 0x0, binAddr: 0x0000000100001008, size: 0x00000000 }
+ - { sym: _bar, objAddr: 0x0, binAddr: 0x0000000100000F40, size: 0x00000050 }
+ - { sym: _inc, objAddr: 0x0, binAddr: 0x0000000100000F90, size: 0x00000019 }
diff --git a/test/tools/llvm-objdump/invalid-input.test b/test/tools/llvm-objdump/invalid-input.test
new file mode 100644
index 0000000..20a901d
--- /dev/null
+++ b/test/tools/llvm-objdump/invalid-input.test
@@ -0,0 +1,6 @@
+RUN: not llvm-objdump -t %p/missing-file 2>&1 | FileCheck %s -check-prefix=NO_SUCH_FILE
+# Don't check the OS-dependent message "No such file or directory".
+NO_SUCH_FILE: '{{.*}}missing-file':
+RUN: not llvm-objdump -t %s 2>&1 | FileCheck %s -check-prefix=UNKNOWN_FILE_TYPE
+UNKNOWN_FILE_TYPE: '{{.*}}invalid-input.test': The file was not recognized as a valid object file
diff --git a/test/tools/llvm-readobj/elf-dtflags.test b/test/tools/llvm-readobj/elf-dtflags.test
index 0ed1c7a..4e6c90d 100644
--- a/test/tools/llvm-readobj/elf-dtflags.test
+++ b/test/tools/llvm-readobj/elf-dtflags.test
@@ -1,4 +1,8 @@
// Test that llvm-readobj dumps DF_XXX and DF_1_XXX flags correctly.
+// The input was generated using the following:
+// $ clang -Wl,-z,origin -Wl,-z,now example.c
+// $ cat example.c
+// int main(void) { return (0); }
RUN: llvm-readobj -dynamic-table %p/Inputs/dtflags.elf-x86-64 | FileCheck %s
diff --git a/test/tools/llvm-readobj/sections-ext.test b/test/tools/llvm-readobj/sections-ext.test
index 4024878..6b4a674 100644
--- a/test/tools/llvm-readobj/sections-ext.test
+++ b/test/tools/llvm-readobj/sections-ext.test
@@ -110,7 +110,7 @@ ELF-NEXT: Relocations [
ELF-NEXT: Symbols [
ELF-NEXT: Symbol {
-ELF-NEXT: Name: .text (0)
+ELF-NEXT: Name: (0)
ELF-NEXT: Value: 0x0
ELF-NEXT: Size: 0
ELF-NEXT: Binding: Local (0x0)
diff --git a/tools/Makefile b/tools/Makefile
index d853486..a47710f 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -19,9 +19,8 @@ else
-# Build LLD and LLDB if present. Note LLDB must be built last as it depends on
+# Build LLDB if present. Note LLDB must be built last as it depends on
# the wider LLVM infrastructure (including Clang).
# NOTE: The tools are organized into five groups of four consisting of one
diff --git a/tools/bugpoint/CrashDebugger.cpp b/tools/bugpoint/CrashDebugger.cpp
index 6f41d30..e2aaf6b 100644
--- a/tools/bugpoint/CrashDebugger.cpp
+++ b/tools/bugpoint/CrashDebugger.cpp
@@ -385,8 +385,7 @@ bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) {
std::vector<std::pair<std::string, std::string> > BlockInfo;
for (BasicBlock *BB : Blocks)
- BlockInfo.push_back(std::make_pair(BB->getParent()->getName(),
- BB->getName()));
+ BlockInfo.emplace_back(BB->getParent()->getName(), BB->getName());
// Now run the CFG simplify pass on the function...
std::vector<std::string> Passes;
diff --git a/tools/bugpoint/Miscompilation.cpp b/tools/bugpoint/Miscompilation.cpp
index 53631d2..fad1636 100644
--- a/tools/bugpoint/Miscompilation.cpp
+++ b/tools/bugpoint/Miscompilation.cpp
@@ -386,10 +386,8 @@ static bool ExtractLoops(BugDriver &BD,
// that masked the error. Stop loop extraction now.
std::vector<std::pair<std::string, FunctionType*> > MisCompFunctions;
- for (unsigned i = 0, e = MiscompiledFunctions.size(); i != e; ++i) {
- Function *F = MiscompiledFunctions[i];
- MisCompFunctions.push_back(std::make_pair(F->getName(),
- F->getFunctionType()));
+ for (Function *F : MiscompiledFunctions) {
+ MisCompFunctions.emplace_back(F->getName(), F->getFunctionType());
if (Linker::LinkModules(ToNotOptimize, ToOptimizeLoopExtracted))
@@ -414,8 +412,7 @@ static bool ExtractLoops(BugDriver &BD,
for (Module::iterator I = ToOptimizeLoopExtracted->begin(),
E = ToOptimizeLoopExtracted->end(); I != E; ++I)
if (!I->isDeclaration())
- MisCompFunctions.push_back(std::make_pair(I->getName(),
- I->getFunctionType()));
+ MisCompFunctions.emplace_back(I->getName(), I->getFunctionType());
// Okay, great! Now we know that we extracted a loop and that loop
// extraction both didn't break the program, and didn't mask the problem.
@@ -596,8 +593,7 @@ static bool ExtractBlocks(BugDriver &BD,
for (Module::iterator I = Extracted->begin(), E = Extracted->end();
I != E; ++I)
if (!I->isDeclaration())
- MisCompFunctions.push_back(std::make_pair(I->getName(),
- I->getFunctionType()));
+ MisCompFunctions.emplace_back(I->getName(), I->getFunctionType());
if (Linker::LinkModules(ProgClone, Extracted.get()))
diff --git a/tools/dsymutil/CMakeLists.txt b/tools/dsymutil/CMakeLists.txt
index 59b37a9..88f9f1f 100644
--- a/tools/dsymutil/CMakeLists.txt
+++ b/tools/dsymutil/CMakeLists.txt
@@ -5,6 +5,7 @@ set(LLVM_LINK_COMPONENTS
+ Target
diff --git a/tools/dsymutil/DebugMap.cpp b/tools/dsymutil/DebugMap.cpp
index 9fa3f78..1a81848 100644
--- a/tools/dsymutil/DebugMap.cpp
+++ b/tools/dsymutil/DebugMap.cpp
@@ -7,6 +7,7 @@
#include "DebugMap.h"
+#include "BinaryHolder.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/DataTypes.h"
@@ -46,8 +47,9 @@ void DebugMapObject::print(raw_ostream &OS) const {
[](const Entry &LHS, const Entry &RHS) { return LHS.first < RHS.first; });
for (const auto &Sym : Entries) {
OS << format("\t%016" PRIx64 " => %016" PRIx64 "+0x%x\t%s\n",
- Sym.second.ObjectAddress, Sym.second.BinaryAddress,
- Sym.second.Size,;
+ uint64_t(Sym.second.ObjectAddress),
+ uint64_t(Sym.second.BinaryAddress), uint32_t(Sym.second.Size),
OS << '\n';
@@ -78,15 +80,160 @@ DebugMapObject::lookupObjectAddress(uint64_t Address) const {
void DebugMap::print(raw_ostream &OS) const {
- OS << "DEBUG MAP: " << BinaryTriple.getTriple()
- << "\n\tobject addr => executable addr\tsymbol name\n";
- for (const auto &Obj : objects())
- Obj->print(OS);
- OS << "END DEBUG MAP\n";
+ yaml::Output yout(OS, /* Ctxt = */ nullptr, /* WrapColumn = */ 0);
+ yout << const_cast<DebugMap &>(*this);
#ifndef NDEBUG
void DebugMap::dump() const { print(errs()); }
+namespace {
+struct YAMLContext {
+ StringRef PrependPath;
+ Triple BinaryTriple;
+DebugMap::parseYAMLDebugMap(StringRef InputFile, StringRef PrependPath,
+ bool Verbose) {
+ auto ErrOrFile = MemoryBuffer::getFileOrSTDIN(InputFile);
+ if (auto Err = ErrOrFile.getError())
+ return Err;
+ YAMLContext Ctxt;
+ Ctxt.PrependPath = PrependPath;
+ std::unique_ptr<DebugMap> Res;
+ yaml::Input yin((*ErrOrFile)->getBuffer(), &Ctxt);
+ yin >> Res;
+ if (auto EC = yin.error())
+ return EC;
+ return std::move(Res);
+namespace yaml {
+// Normalize/Denormalize between YAML and a DebugMapObject.
+struct MappingTraits<dsymutil::DebugMapObject>::YamlDMO {
+ YamlDMO(IO &io) {}
+ YamlDMO(IO &io, dsymutil::DebugMapObject &Obj);
+ dsymutil::DebugMapObject denormalize(IO &IO);
+ std::string Filename;
+ std::vector<dsymutil::DebugMapObject::YAMLSymbolMapping> Entries;
+void MappingTraits<std::pair<std::string, DebugMapObject::SymbolMapping>>::
+ mapping(IO &io, std::pair<std::string, DebugMapObject::SymbolMapping> &s) {
+ io.mapRequired("sym", s.first);
+ io.mapRequired("objAddr", s.second.ObjectAddress);
+ io.mapRequired("binAddr", s.second.BinaryAddress);
+ io.mapOptional("size", s.second.Size);
+void MappingTraits<dsymutil::DebugMapObject>::mapping(
+ IO &io, dsymutil::DebugMapObject &DMO) {
+ MappingNormalization<YamlDMO, dsymutil::DebugMapObject> Norm(io, DMO);
+ io.mapRequired("filename", Norm->Filename);
+ io.mapRequired("symbols", Norm->Entries);
+void ScalarTraits<Triple>::output(const Triple &val, void *,
+ llvm::raw_ostream &out) {
+ out << val.str();
+StringRef ScalarTraits<Triple>::input(StringRef scalar, void *, Triple &value) {
+ value = Triple(scalar);
+ return StringRef();
+ IO &io, std::vector<std::unique_ptr<dsymutil::DebugMapObject>> &seq) {
+ return seq.size();
+dsymutil::DebugMapObject &
+ IO &, std::vector<std::unique_ptr<dsymutil::DebugMapObject>> &seq,
+ size_t index) {
+ if (index >= seq.size()) {
+ seq.resize(index + 1);
+ seq[index].reset(new dsymutil::DebugMapObject);
+ }
+ return *seq[index];
+void MappingTraits<dsymutil::DebugMap>::mapping(IO &io,
+ dsymutil::DebugMap &DM) {
+ io.mapRequired("triple", DM.BinaryTriple);
+ io.mapOptional("objects", DM.Objects);
+ if (void *Ctxt = io.getContext())
+ reinterpret_cast<YAMLContext *>(Ctxt)->BinaryTriple = DM.BinaryTriple;
+void MappingTraits<std::unique_ptr<dsymutil::DebugMap>>::mapping(
+ IO &io, std::unique_ptr<dsymutil::DebugMap> &DM) {
+ if (!DM)
+ DM.reset(new DebugMap());
+ io.mapRequired("triple", DM->BinaryTriple);
+ io.mapOptional("objects", DM->Objects);
+ if (void *Ctxt = io.getContext())
+ reinterpret_cast<YAMLContext *>(Ctxt)->BinaryTriple = DM->BinaryTriple;
+ IO &io, dsymutil::DebugMapObject &Obj) {
+ Filename = Obj.Filename;
+ Entries.reserve(Obj.Symbols.size());
+ for (auto &Entry : Obj.Symbols)
+ Entries.push_back(std::make_pair(Entry.getKey(), Entry.getValue()));
+MappingTraits<dsymutil::DebugMapObject>::YamlDMO::denormalize(IO &IO) {
+ BinaryHolder BinHolder(/* Verbose =*/false);
+ const auto &Ctxt = *reinterpret_cast<YAMLContext *>(IO.getContext());
+ SmallString<80> Path(Ctxt.PrependPath);
+ StringMap<uint64_t> SymbolAddresses;
+ sys::path::append(Path, Filename);
+ auto ErrOrObjectFile = BinHolder.GetObjectFile(Path);
+ if (auto EC = ErrOrObjectFile.getError()) {
+ llvm::errs() << "warning: Unable to open " << Path << " " << EC.message()
+ << '\n';
+ } else {
+ // Rewrite the object file symbol addresses in the debug map. The
+ // YAML input is mainly used to test llvm-dsymutil without
+ // requiring binaries checked-in. If we generate the object files
+ // during the test, we can't hardcode the symbols addresses, so
+ // look them up here and rewrite them.
+ for (const auto &Sym : ErrOrObjectFile->symbols()) {
+ StringRef Name;
+ uint64_t Address;
+ if (Sym.getName(Name) || Sym.getAddress(Address))
+ continue;
+ SymbolAddresses[Name] = Address;
+ }
+ }
+ dsymutil::DebugMapObject Res(Path);
+ for (auto &Entry : Entries) {
+ auto &Mapping = Entry.second;
+ uint64_t ObjAddress = Mapping.ObjectAddress;
+ auto AddressIt = SymbolAddresses.find(Entry.first);
+ if (AddressIt != SymbolAddresses.end())
+ ObjAddress = AddressIt->getValue();
+ Res.addSymbol(Entry.first, ObjAddress, Mapping.BinaryAddress, Mapping.Size);
+ }
+ return Res;
diff --git a/tools/dsymutil/DebugMap.h b/tools/dsymutil/DebugMap.h
index ee48b09..d0edbab 100644
--- a/tools/dsymutil/DebugMap.h
+++ b/tools/dsymutil/DebugMap.h
@@ -28,6 +28,8 @@
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/YAMLTraits.h"
#include <vector>
namespace llvm {
@@ -66,6 +68,12 @@ class DebugMap {
typedef std::vector<std::unique_ptr<DebugMapObject>> ObjectContainer;
ObjectContainer Objects;
+ /// For YAML IO support.
+ ///@{
+ friend yaml::MappingTraits<std::unique_ptr<DebugMap>>;
+ friend yaml::MappingTraits<DebugMap>;
+ DebugMap() = default;
+ ///@}
DebugMap(const Triple &BinaryTriple) : BinaryTriple(BinaryTriple) {}
@@ -90,6 +98,10 @@ public:
#ifndef NDEBUG
void dump() const;
+ /// Read a debug map for \a InputFile.
+ static ErrorOr<std::unique_ptr<DebugMap>>
+ parseYAMLDebugMap(StringRef InputFile, StringRef PrependPath, bool Verbose);
/// \brief The DebugMapObject represents one object file described by
@@ -99,12 +111,14 @@ public:
class DebugMapObject {
struct SymbolMapping {
- uint64_t ObjectAddress;
- uint64_t BinaryAddress;
- uint32_t Size;
+ yaml::Hex64 ObjectAddress;
+ yaml::Hex64 BinaryAddress;
+ yaml::Hex32 Size;
SymbolMapping(uint64_t ObjectAddress, uint64_t BinaryAddress, uint32_t Size)
: ObjectAddress(ObjectAddress), BinaryAddress(BinaryAddress),
Size(Size) {}
+ /// For YAML IO support
+ SymbolMapping() = default;
typedef StringMapEntry<SymbolMapping> DebugMapEntry;
@@ -141,6 +155,72 @@ private:
std::string Filename;
StringMap<SymbolMapping> Symbols;
DenseMap<uint64_t, DebugMapEntry *> AddressToMapping;
+ /// For YAMLIO support.
+ ///@{
+ typedef std::pair<std::string, SymbolMapping> YAMLSymbolMapping;
+ friend yaml::MappingTraits<dsymutil::DebugMapObject>;
+ friend yaml::SequenceTraits<std::vector<std::unique_ptr<DebugMapObject>>>;
+ friend yaml::SequenceTraits<std::vector<YAMLSymbolMapping>>;
+ DebugMapObject() = default;
+ DebugMapObject &operator=(DebugMapObject RHS) {
+ std::swap(Filename, RHS.Filename);
+ std::swap(Symbols, RHS.Symbols);
+ std::swap(AddressToMapping, RHS.AddressToMapping);
+ return *this;
+ }
+ DebugMapObject(DebugMapObject &&RHS) {
+ Filename = std::move(RHS.Filename);
+ Symbols = std::move(RHS.Symbols);
+ AddressToMapping = std::move(RHS.AddressToMapping);
+ }
+ ///@}
+namespace llvm {
+namespace yaml {
+using namespace llvm::dsymutil;
+template <>
+struct MappingTraits<std::pair<std::string, DebugMapObject::SymbolMapping>> {
+ static void mapping(IO &io,
+ std::pair<std::string, DebugMapObject::SymbolMapping> &s);
+ static const bool flow = true;
+template <> struct MappingTraits<dsymutil::DebugMapObject> {
+ struct YamlDMO;
+ static void mapping(IO &io, dsymutil::DebugMapObject &DMO);
+template <> struct ScalarTraits<Triple> {
+ static void output(const Triple &val, void *, llvm::raw_ostream &out);
+ static StringRef input(StringRef scalar, void *, Triple &value);
+ static bool mustQuote(StringRef) { return true; }
+template <>
+struct SequenceTraits<std::vector<std::unique_ptr<dsymutil::DebugMapObject>>> {
+ static size_t
+ size(IO &io, std::vector<std::unique_ptr<dsymutil::DebugMapObject>> &seq);
+ static dsymutil::DebugMapObject &
+ element(IO &, std::vector<std::unique_ptr<dsymutil::DebugMapObject>> &seq,
+ size_t index);
+template <> struct MappingTraits<dsymutil::DebugMap> {
+ static void mapping(IO &io, dsymutil::DebugMap &DM);
+template <> struct MappingTraits<std::unique_ptr<dsymutil::DebugMap>> {
+ static void mapping(IO &io, std::unique_ptr<dsymutil::DebugMap> &DM);
diff --git a/tools/dsymutil/DwarfLinker.cpp b/tools/dsymutil/DwarfLinker.cpp
index a6e62a8..7dc15b9 100644
--- a/tools/dsymutil/DwarfLinker.cpp
+++ b/tools/dsymutil/DwarfLinker.cpp
@@ -60,6 +60,36 @@ using HalfOpenIntervalMap =
typedef HalfOpenIntervalMap<uint64_t, int64_t> FunctionIntervals;
+// FIXME: Delete this structure once DIE::Values has a stable iterator we can
+// use instead.
+struct PatchLocation {
+ DIE *Die;
+ unsigned Index;
+ PatchLocation() : Die(nullptr), Index(0) {}
+ PatchLocation(DIE &Die, unsigned Index) : Die(&Die), Index(Index) {}
+ PatchLocation(DIE &Die)
+ : Die(&Die), Index(std::distance(Die.values_begin(), Die.values_end())) {}
+ void set(uint64_t New) const {
+ assert(Die);
+ assert((signed)Index <
+ std::distance(Die->values_begin(), Die->values_end()));
+ const auto &Old = Die->values_begin()[Index];
+ assert(Old.getType() == DIEValue::isInteger);
+ Die->setValue(Index,
+ DIEValue(Old.getAttribute(), Old.getForm(), DIEInteger(New)));
+ }
+ uint64_t get() const {
+ assert(Die);
+ assert((signed)Index <
+ std::distance(Die->values_begin(), Die->values_end()));
+ assert(Die->values_begin()[Index].getType() == DIEValue::isInteger);
+ return Die->values_begin()[Index].getDIEInteger().getValue();
+ }
/// \brief Stores all information relating to a compile unit, be it in
/// its original instance in the object file to its brand new cloned
/// and linked DIE tree.
@@ -76,7 +106,7 @@ public:
CompileUnit(DWARFUnit &OrigUnit, unsigned ID)
: OrigUnit(OrigUnit), ID(ID), LowPc(UINT64_MAX), HighPc(0), RangeAlloc(),
- Ranges(RangeAlloc), UnitRangeAttribute(nullptr) {
+ Ranges(RangeAlloc) {
@@ -106,13 +136,15 @@ public:
uint64_t getLowPc() const { return LowPc; }
uint64_t getHighPc() const { return HighPc; }
- DIEInteger *getUnitRangesAttribute() const { return UnitRangeAttribute; }
+ Optional<PatchLocation> getUnitRangesAttribute() const {
+ return UnitRangeAttribute;
+ }
const FunctionIntervals &getFunctionRanges() const { return Ranges; }
- const std::vector<DIEInteger *> &getRangesAttributes() const {
+ const std::vector<PatchLocation> &getRangesAttributes() const {
return RangeAttributes;
- const std::vector<std::pair<DIEInteger *, int64_t>> &
+ const std::vector<std::pair<PatchLocation, int64_t>> &
getLocationAttributes() const {
return LocationAttributes;
@@ -127,7 +159,7 @@ public:
/// RefUnit by \p Attr. The attribute should be fixed up later to
/// point to the absolute offset of \p Die in the debug_info section.
void noteForwardReference(DIE *Die, const CompileUnit *RefUnit,
- DIEInteger *Attr);
+ PatchLocation Attr);
/// \brief Apply all fixups recored by noteForwardReference().
void fixupForwardReferences();
@@ -138,11 +170,11 @@ public:
/// \brief Keep track of a DW_AT_range attribute that we will need to
/// patch up later.
- void noteRangeAttribute(const DIE &Die, DIEInteger *Attr);
+ void noteRangeAttribute(const DIE &Die, PatchLocation Attr);
/// \brief Keep track of a location attribute pointing to a location
/// list in the debug_loc section.
- void noteLocationAttribute(DIEInteger *Attr, int64_t PcOffset);
+ void noteLocationAttribute(PatchLocation Attr, int64_t PcOffset);
/// \brief Add a name accelerator entry for \p Die with \p Name
/// which is stored in the string table at \p Offset.
@@ -154,8 +186,8 @@ public:
void addTypeAccelerator(const DIE *Die, const char *Name, uint32_t Offset);
struct AccelInfo {
- StringRef Name; ///< Name of the entry.
- const DIE *Die; ///< DIE this entry describes.
+ StringRef Name; ///< Name of the entry.
+ const DIE *Die; ///< DIE this entry describes.
uint32_t NameOffset; ///< Offset of Name in the string pool.
bool SkipPubSection; ///< Emit this entry only in the apple_* sections.
@@ -186,7 +218,7 @@ private:
/// The offsets for the attributes in this array couldn't be set while
/// cloning because for cross-cu forward refences the target DIE's
/// offset isn't known you emit the reference attribute.
- std::vector<std::tuple<DIE *, const CompileUnit *, DIEInteger *>>
+ std::vector<std::tuple<DIE *, const CompileUnit *, PatchLocation>>
FunctionIntervals::Allocator RangeAlloc;
@@ -198,15 +230,15 @@ private:
/// \brief DW_AT_ranges attributes to patch after we have gathered
/// all the unit's function addresses.
/// @{
- std::vector<DIEInteger *> RangeAttributes;
- DIEInteger *UnitRangeAttribute;
+ std::vector<PatchLocation> RangeAttributes;
+ Optional<PatchLocation> UnitRangeAttribute;
/// @}
/// \brief Location attributes that need to be transfered from th
/// original debug_loc section to the liked one. They are stored
/// along with the PC offset that is to be applied to their
/// function's address.
- std::vector<std::pair<DIEInteger *, int64_t>> LocationAttributes;
+ std::vector<std::pair<PatchLocation, int64_t>> LocationAttributes;
/// \brief Accelerator entries for the unit, both for the pub*
/// sections and the apple* ones.
@@ -229,7 +261,7 @@ uint64_t CompileUnit::computeNextUnitOffset() {
/// \brief Keep track of a forward cross-cu reference from this unit
/// to \p Die that lives in \p RefUnit.
void CompileUnit::noteForwardReference(DIE *Die, const CompileUnit *RefUnit,
- DIEInteger *Attr) {
+ PatchLocation Attr) {
ForwardDIEReferences.emplace_back(Die, RefUnit, Attr);
@@ -238,9 +270,9 @@ void CompileUnit::fixupForwardReferences() {
for (const auto &Ref : ForwardDIEReferences) {
DIE *RefDie;
const CompileUnit *RefUnit;
- DIEInteger *Attr;
+ PatchLocation Attr;
std::tie(RefDie, RefUnit, Attr) = Ref;
- Attr->setValue(RefDie->getOffset() + RefUnit->getStartOffset());
+ Attr.set(RefDie->getOffset() + RefUnit->getStartOffset());
@@ -251,14 +283,14 @@ void CompileUnit::addFunctionRange(uint64_t FuncLowPc, uint64_t FuncHighPc,
this->HighPc = std::max(HighPc, FuncHighPc + PcOffset);
-void CompileUnit::noteRangeAttribute(const DIE &Die, DIEInteger *Attr) {
+void CompileUnit::noteRangeAttribute(const DIE &Die, PatchLocation Attr) {
if (Die.getTag() != dwarf::DW_TAG_compile_unit)
UnitRangeAttribute = Attr;
-void CompileUnit::noteLocationAttribute(DIEInteger *Attr, int64_t PcOffset) {
+void CompileUnit::noteLocationAttribute(PatchLocation Attr, int64_t PcOffset) {
LocationAttributes.emplace_back(Attr, PcOffset);
@@ -387,6 +419,7 @@ class DwarfStreamer {
uint32_t RangesSectionSize;
uint32_t LocSectionSize;
uint32_t LineSectionSize;
+ uint32_t FrameSectionSize;
/// \brief Emit the pubnames or pubtypes section contribution for \p
/// Unit into \p Sec. The data is provided in \p Names.
@@ -460,6 +493,15 @@ public:
/// \brief Emit the .debug_pubtypes contribution for \p Unit.
void emitPubTypesForUnit(const CompileUnit &Unit);
+ /// \brief Emit a CIE.
+ void emitCIE(StringRef CIEBytes);
+ /// \brief Emit an FDE with data \p Bytes.
+ void emitFDE(uint32_t CIEOffset, uint32_t AddreSize, uint32_t Address,
+ StringRef Bytes);
+ uint32_t getFrameSectionSize() const { return FrameSectionSize; }
bool DwarfStreamer::init(Triple TheTriple, StringRef OutputFilename) {
@@ -529,6 +571,7 @@ bool DwarfStreamer::init(Triple TheTriple, StringRef OutputFilename) {
RangesSectionSize = 0;
LocSectionSize = 0;
LineSectionSize = 0;
+ FrameSectionSize = 0;
return true;
@@ -717,8 +760,7 @@ void DwarfStreamer::emitUnitRangesEntries(CompileUnit &Unit,
/// point to the new entries.
void DwarfStreamer::emitLocationsForUnit(const CompileUnit &Unit,
DWARFContext &Dwarf) {
- const std::vector<std::pair<DIEInteger *, int64_t>> &Attributes =
- Unit.getLocationAttributes();
+ const auto &Attributes = Unit.getLocationAttributes();
if (Attributes.empty())
@@ -737,8 +779,8 @@ void DwarfStreamer::emitLocationsForUnit(const CompileUnit &Unit,
UnitPcOffset = int64_t(OrigLowPc) - Unit.getLowPc();
for (const auto &Attr : Attributes) {
- uint32_t Offset = Attr.first->getValue();
- Attr.first->setValue(LocSectionSize);
+ uint32_t Offset = Attr.first.get();
+ Attr.first.set(LocSectionSize);
// This is the quantity to add to the old location address to get
// the correct address for the new one.
int64_t LocPcOffset = Attr.second + UnitPcOffset;
@@ -934,7 +976,7 @@ void DwarfStreamer::emitPubSectionForUnit(
Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); // Length
Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION); // Version
- Asm->EmitInt32(Unit.getStartOffset()); // Unit offset
+ Asm->EmitInt32(Unit.getStartOffset()); // Unit offset
Asm->EmitInt32(Unit.getNextUnitOffset() - Unit.getStartOffset()); // Size
HeaderEmitted = true;
@@ -961,6 +1003,28 @@ void DwarfStreamer::emitPubTypesForUnit(const CompileUnit &Unit) {
"types", Unit, Unit.getPubtypes());
+/// \brief Emit a CIE into the debug_frame section.
+void DwarfStreamer::emitCIE(StringRef CIEBytes) {
+ MS->SwitchSection(MC->getObjectFileInfo()->getDwarfFrameSection());
+ MS->EmitBytes(CIEBytes);
+ FrameSectionSize += CIEBytes.size();
+/// \brief Emit a FDE into the debug_frame section. \p FDEBytes
+/// contains the FDE data without the length, CIE offset and address
+/// which will be replaced with the paramter values.
+void DwarfStreamer::emitFDE(uint32_t CIEOffset, uint32_t AddrSize,
+ uint32_t Address, StringRef FDEBytes) {
+ MS->SwitchSection(MC->getObjectFileInfo()->getDwarfFrameSection());
+ MS->EmitIntValue(FDEBytes.size() + 4 + AddrSize, 4);
+ MS->EmitIntValue(CIEOffset, 4);
+ MS->EmitIntValue(Address, AddrSize);
+ MS->EmitBytes(FDEBytes);
+ FrameSectionSize += FDEBytes.size() + 8 + AddrSize;
/// \brief The core of the Dwarf linking logic.
/// The link of the dwarf information from the object files will be
@@ -979,7 +1043,7 @@ class DwarfLinker {
DwarfLinker(StringRef OutputFilename, const LinkOptions &Options)
: OutputFilename(OutputFilename), Options(Options),
- BinHolder(Options.Verbose) {}
+ BinHolder(Options.Verbose), LastCIEOffset(0) {}
~DwarfLinker() {
for (auto *Abbrev : Abbreviations)
@@ -1177,6 +1241,10 @@ private:
/// \brief Emit the accelerator entries for \p Unit.
void emitAcceleratorEntriesForUnit(CompileUnit &Unit);
+ /// \brief Patch the frame info for an object file and emit it.
+ void patchFrameInfoForObject(const DebugMapObject &, DWARFContext &,
+ unsigned AddressSize);
/// \brief DIELoc objects that need to be destructed (but not freed!).
std::vector<DIELoc *> DIELocs;
/// \brief DIEBlock objects that need to be destructed (but not freed!).
@@ -1226,6 +1294,16 @@ private:
/// See startDebugObject() for a more complete description of its use.
std::map<uint64_t, std::pair<uint64_t, int64_t>> Ranges;
+ /// \brief The CIEs that have been emitted in the output
+ /// section. The actual CIE data serves a the key to this StringMap,
+ /// this takes care of comparing the semantics of CIEs defined in
+ /// different object files.
+ StringMap<uint32_t> EmittedCIEs;
+ /// Offset of the last CIE that has been emitted in the output
+ /// debug_frame section.
+ uint32_t LastCIEOffset;
/// \brief Similar to DWARFUnitSection::getUnitForOffset(), but
@@ -1477,15 +1555,15 @@ bool DwarfLinker::hasValidRelocation(uint32_t StartOffset, uint32_t EndOffset,
return false;
const auto &ValidReloc = ValidRelocs[NextValidReloc++];
+ const auto &Mapping = ValidReloc.Mapping->getValue();
if (Options.Verbose)
outs() << "Found valid debug map entry: " << ValidReloc.Mapping->getKey()
<< " " << format("\t%016" PRIx64 " => %016" PRIx64,
- ValidReloc.Mapping->getValue().ObjectAddress,
- ValidReloc.Mapping->getValue().BinaryAddress);
+ uint64_t(Mapping.ObjectAddress),
+ uint64_t(Mapping.BinaryAddress));
- Info.AddrAdjust = int64_t(ValidReloc.Mapping->getValue().BinaryAddress) +
- ValidReloc.Addend -
- ValidReloc.Mapping->getValue().ObjectAddress;
+ Info.AddrAdjust = int64_t(Mapping.BinaryAddress) + ValidReloc.Addend -
+ Mapping.ObjectAddress;
Info.InDebugMap = true;
return true;
@@ -1760,7 +1838,7 @@ unsigned DwarfLinker::cloneStringAttribute(DIE &Die, AttributeSpec AttrSpec,
const char *String = *Val.getAsCString(&U);
unsigned Offset = StringPool.getStringOffset(String);
Die.addValue(dwarf::Attribute(AttrSpec.Attr), dwarf::DW_FORM_strp,
- new (DIEAlloc) DIEInteger(Offset));
+ DIEInteger(Offset));
return 4;
@@ -1803,24 +1881,24 @@ unsigned DwarfLinker::cloneDieReferenceAttribute(
// to find the unit offset. (We don't have a DwarfDebug)
// FIXME: we should be able to design DIEEntry reliance on
// DwarfDebug away.
- DIEInteger *Attr;
+ uint64_t Attr;
if (Ref < InputDIE.getOffset()) {
// We must have already cloned that DIE.
uint32_t NewRefOffset =
RefUnit->getStartOffset() + NewRefDie->getOffset();
- Attr = new (DIEAlloc) DIEInteger(NewRefOffset);
+ Attr = NewRefOffset;
} else {
// A forward reference. Note and fixup later.
- Attr = new (DIEAlloc) DIEInteger(0xBADDEF);
- Unit.noteForwardReference(NewRefDie, RefUnit, Attr);
+ Attr = 0xBADDEF;
+ Unit.noteForwardReference(NewRefDie, RefUnit, PatchLocation(Die));
Die.addValue(dwarf::Attribute(AttrSpec.Attr), dwarf::DW_FORM_ref_addr,
- Attr);
+ DIEInteger(Attr));
return AttrSize;
Die.addValue(dwarf::Attribute(AttrSpec.Attr), dwarf::Form(AttrSpec.Form),
- new (DIEAlloc) DIEEntry(*NewRefDie));
+ DIEEntry(*NewRefDie));
return AttrSize;
@@ -1831,23 +1909,29 @@ unsigned DwarfLinker::cloneBlockAttribute(DIE &Die, AttributeSpec AttrSpec,
const DWARFFormValue &Val,
unsigned AttrSize) {
DIE *Attr;
- DIEValue *Value;
+ DIEValue Value;
DIELoc *Loc = nullptr;
DIEBlock *Block = nullptr;
// Just copy the block data over.
if (AttrSpec.Form == dwarf::DW_FORM_exprloc) {
- Loc = new (DIEAlloc) DIELoc();
+ Loc = new (DIEAlloc) DIELoc;
} else {
- Block = new (DIEAlloc) DIEBlock();
+ Block = new (DIEAlloc) DIEBlock;
Attr = Loc ? static_cast<DIE *>(Loc) : static_cast<DIE *>(Block);
- Value = Loc ? static_cast<DIEValue *>(Loc) : static_cast<DIEValue *>(Block);
+ if (Loc)
+ Value = DIEValue(dwarf::Attribute(AttrSpec.Attr),
+ dwarf::Form(AttrSpec.Form), Loc);
+ else
+ Value = DIEValue(dwarf::Attribute(AttrSpec.Attr),
+ dwarf::Form(AttrSpec.Form), Block);
ArrayRef<uint8_t> Bytes = *Val.getAsBlock();
for (auto Byte : Bytes)
Attr->addValue(static_cast<dwarf::Attribute>(0), dwarf::DW_FORM_data1,
- new (DIEAlloc) DIEInteger(Byte));
+ DIEInteger(Byte));
// FIXME: If DIEBlock and DIELoc just reuses the Size field of
// the DIE class, this if could be replaced by
// Attr->setSize(Bytes.size()).
@@ -1857,8 +1941,7 @@ unsigned DwarfLinker::cloneBlockAttribute(DIE &Die, AttributeSpec AttrSpec,
- Die.addValue(dwarf::Attribute(AttrSpec.Attr), dwarf::Form(AttrSpec.Form),
- Value);
+ Die.addValue(Value);
return AttrSize;
@@ -1893,8 +1976,7 @@ unsigned DwarfLinker::cloneAddressAttribute(DIE &Die, AttributeSpec AttrSpec,
- static_cast<dwarf::Form>(AttrSpec.Form),
- new (DIEAlloc) DIEInteger(Addr));
+ static_cast<dwarf::Form>(AttrSpec.Form), DIEInteger(Addr));
return Unit.getOrigUnit().getAddressByteSize();
@@ -1922,15 +2004,15 @@ unsigned DwarfLinker::cloneScalarAttribute(
&Unit.getOrigUnit(), &InputDIE);
return 0;
- DIEInteger *Attr = new (DIEAlloc) DIEInteger(Value);
+ DIEInteger Attr(Value);
if (AttrSpec.Attr == dwarf::DW_AT_ranges)
- Unit.noteRangeAttribute(Die, Attr);
+ Unit.noteRangeAttribute(Die, PatchLocation(Die));
// A more generic way to check for location attributes would be
// nice, but it's very unlikely that any other attribute needs a
// location list.
else if (AttrSpec.Attr == dwarf::DW_AT_location ||
AttrSpec.Attr == dwarf::DW_AT_frame_base)
- Unit.noteLocationAttribute(Attr, Info.PCOffset);
+ Unit.noteLocationAttribute(PatchLocation(Die), Info.PCOffset);
else if (AttrSpec.Attr == dwarf::DW_AT_declaration && Value)
Info.IsDeclaration = true;
@@ -2157,14 +2239,15 @@ DIE *DwarfLinker::cloneDIE(const DWARFDebugInfoEntryMinimal &InputDIE,
Unit.addTypeAccelerator(Die, AttrInfo.Name, AttrInfo.NameOffset);
- DIEAbbrev &NewAbbrev = Die->getAbbrev();
+ DIEAbbrev NewAbbrev = Die->generateAbbrev();
// If a scope DIE is kept, we must have kept at least one child. If
// it's not the case, we'll just be emitting one wasteful end of
// children marker, but things won't break.
if (InputDIE.hasChildren())
// Assign a permanent abbrev number
- AssignAbbrev(Die->getAbbrev());
+ AssignAbbrev(NewAbbrev);
+ Die->setAbbrevNumber(NewAbbrev.getNumber());
// Add the size of the abbreviation number to the output offset.
OutOffset += getULEB128Size(Die->getAbbrevNumber());
@@ -2213,8 +2296,8 @@ void DwarfLinker::patchRangesForUnit(const CompileUnit &Unit,
UnitPcOffset = int64_t(OrigLowPc) - Unit.getLowPc();
for (const auto &RangeAttribute : Unit.getRangesAttributes()) {
- uint32_t Offset = RangeAttribute->getValue();
- RangeAttribute->setValue(Streamer->getRangesSectionSize());
+ uint32_t Offset = RangeAttribute.get();
+ RangeAttribute.set(Streamer->getRangesSectionSize());
RangeList.extract(RangeExtractor, &Offset);
const auto &Entries = RangeList.getEntries();
const DWARFDebugRangeList::RangeListEntry &First = Entries.front();
@@ -2241,10 +2324,10 @@ void DwarfLinker::patchRangesForUnit(const CompileUnit &Unit,
/// but for the sake of initial bit-for-bit compatibility with legacy
/// dsymutil, we have to do it in a delayed pass.
void DwarfLinker::generateUnitRanges(CompileUnit &Unit) const {
- DIEInteger *Attr = Unit.getUnitRangesAttribute();
+ auto Attr = Unit.getUnitRangesAttribute();
if (Attr)
- Attr->setValue(Streamer->getRangesSectionSize());
- Streamer->emitUnitRangesEntries(Unit, Attr != nullptr);
+ Attr->set(Streamer->getRangesSectionSize());
+ Streamer->emitUnitRangesEntries(Unit, static_cast<bool>(Attr));
/// \brief Insert the new line info sequence \p Seq into the current
@@ -2286,8 +2369,7 @@ static void insertLineSequence(std::vector<DWARFDebugLine::Row> &Seq,
/// are present in the binary.
void DwarfLinker::patchLineTableForUnit(CompileUnit &Unit,
DWARFContext &OrigDwarf) {
- const DWARFDebugInfoEntryMinimal *CUDie =
- Unit.getOrigUnit().getUnitDIE();
+ const DWARFDebugInfoEntryMinimal *CUDie = Unit.getOrigUnit().getUnitDIE();
uint64_t StmtList = CUDie->getAttributeValueAsSectionOffset(
&Unit.getOrigUnit(), dwarf::DW_AT_stmt_list, -1ULL);
if (StmtList == -1ULL)
@@ -2295,15 +2377,16 @@ void DwarfLinker::patchLineTableForUnit(CompileUnit &Unit,
// Update the cloned DW_AT_stmt_list with the correct debug_line offset.
if (auto *OutputDIE = Unit.getOutputUnitDIE()) {
- const auto &Abbrev = OutputDIE->getAbbrev().getData();
- auto Stmt = std::find_if(
- Abbrev.begin(), Abbrev.end(), [](const DIEAbbrevData &AbbrevData) {
- return AbbrevData.getAttribute() == dwarf::DW_AT_stmt_list;
- });
- assert(Stmt < Abbrev.end() && "Didn't find DW_AT_stmt_list in cloned DIE!");
- DIEInteger *StmtAttr =
- cast<DIEInteger>(OutputDIE->getValues()[Stmt - Abbrev.begin()]);
- StmtAttr->setValue(Streamer->getLineSectionSize());
+ auto Stmt =
+ std::find_if(OutputDIE->values_begin(), OutputDIE->values_end(),
+ [](const DIEValue &Value) {
+ return Value.getAttribute() == dwarf::DW_AT_stmt_list;
+ });
+ assert(Stmt != OutputDIE->values_end() &&
+ "Didn't find DW_AT_stmt_list in cloned DIE!");
+ OutputDIE->setValue(Stmt - OutputDIE->values_begin(),
+ DIEValue(Stmt->getAttribute(), Stmt->getForm(),
+ DIEInteger(Streamer->getLineSectionSize())));
// Parse the original line info for the unit.
@@ -2422,6 +2505,91 @@ void DwarfLinker::emitAcceleratorEntriesForUnit(CompileUnit &Unit) {
+/// \brief Read the frame info stored in the object, and emit the
+/// patched frame descriptions for the linked binary.
+/// This is actually pretty easy as the data of the CIEs and FDEs can
+/// be considered as black boxes and moved as is. The only thing to do
+/// is to patch the addresses in the headers.
+void DwarfLinker::patchFrameInfoForObject(const DebugMapObject &DMO,
+ DWARFContext &OrigDwarf,
+ unsigned AddrSize) {
+ StringRef FrameData = OrigDwarf.getDebugFrameSection();
+ if (FrameData.empty())
+ return;
+ DataExtractor Data(FrameData, OrigDwarf.isLittleEndian(), 0);
+ uint32_t InputOffset = 0;
+ // Store the data of the CIEs defined in this object, keyed by their
+ // offsets.
+ DenseMap<uint32_t, StringRef> LocalCIES;
+ while (Data.isValidOffset(InputOffset)) {
+ uint32_t EntryOffset = InputOffset;
+ uint32_t InitialLength = Data.getU32(&InputOffset);
+ if (InitialLength == 0xFFFFFFFF)
+ return reportWarning("Dwarf64 bits no supported");
+ uint32_t CIEId = Data.getU32(&InputOffset);
+ if (CIEId == 0xFFFFFFFF) {
+ // This is a CIE, store it.
+ StringRef CIEData = FrameData.substr(EntryOffset, InitialLength + 4);
+ LocalCIES[EntryOffset] = CIEData;
+ // The -4 is to account for the CIEId we just read.
+ InputOffset += InitialLength - 4;
+ continue;
+ }
+ uint32_t Loc = Data.getUnsigned(&InputOffset, AddrSize);
+ // Some compilers seem to emit frame info that doesn't start at
+ // the function entry point, thus we can't just lookup the address
+ // in the debug map. Use the linker's range map to see if the FDE
+ // describes something that we can relocate.
+ auto Range = Ranges.upper_bound(Loc);
+ if (Range != Ranges.begin())
+ --Range;
+ if (Range == Ranges.end() || Range->first > Loc ||
+ Range->second.first <= Loc) {
+ // The +4 is to account for the size of the InitialLength field itself.
+ InputOffset = EntryOffset + InitialLength + 4;
+ continue;
+ }
+ // This is an FDE, and we have a mapping.
+ // Have we already emitted a corresponding CIE?
+ StringRef CIEData = LocalCIES[CIEId];
+ if (CIEData.empty())
+ return reportWarning("Inconsistent debug_frame content. Dropping.");
+ // Look if we already emitted a CIE that corresponds to the
+ // referenced one (the CIE data is the key of that lookup).
+ auto IteratorInserted = EmittedCIEs.insert(
+ std::make_pair(CIEData, Streamer->getFrameSectionSize()));
+ // If there is no CIE yet for this ID, emit it.
+ if (IteratorInserted.second ||
+ // FIXME: dsymutil-classic only caches the last used CIE for
+ // reuse. Mimic that behavior for now. Just removing that
+ // second half of the condition and the LastCIEOffset variable
+ // makes the code DTRT.
+ LastCIEOffset != IteratorInserted.first->getValue()) {
+ LastCIEOffset = Streamer->getFrameSectionSize();
+ IteratorInserted.first->getValue() = LastCIEOffset;
+ Streamer->emitCIE(CIEData);
+ }
+ // Emit the FDE with updated address and CIE pointer.
+ // (4 + AddrSize) is the size of the CIEId + initial_location
+ // fields that will get reconstructed by emitFDE().
+ unsigned FDERemainingBytes = InitialLength - (4 + AddrSize);
+ Streamer->emitFDE(IteratorInserted.first->getValue(), AddrSize,
+ Loc + Range->second.second,
+ FrameData.substr(InputOffset, FDERemainingBytes));
+ InputOffset += FDERemainingBytes;
+ }
bool DwarfLinker::link(const DebugMap &Map) {
if (Map.begin() == Map.end()) {
@@ -2519,6 +2687,10 @@ bool DwarfLinker::link(const DebugMap &Map) {
+ if (!ValidRelocs.empty() && !Options.NoOutput && !Units.empty())
+ patchFrameInfoForObject(*Obj, DwarfContext,
+ Units[0].getOrigUnit().getAddressByteSize());
// Clean-up before starting working on the next object.
diff --git a/tools/dsymutil/MachODebugMapParser.cpp b/tools/dsymutil/MachODebugMapParser.cpp
index bf64303..b803e41 100644
--- a/tools/dsymutil/MachODebugMapParser.cpp
+++ b/tools/dsymutil/MachODebugMapParser.cpp
@@ -244,10 +244,16 @@ void MachODebugMapParser::loadMainBinarySymbols() {
namespace llvm {
namespace dsymutil {
-parseDebugMap(StringRef InputFile, StringRef PrependPath, bool Verbose) {
- MachODebugMapParser Parser(InputFile, PrependPath, Verbose);
- return Parser.parse();
+llvm::ErrorOr<std::unique_ptr<DebugMap>> parseDebugMap(StringRef InputFile,
+ StringRef PrependPath,
+ bool Verbose,
+ bool InputIsYAML) {
+ if (!InputIsYAML) {
+ MachODebugMapParser Parser(InputFile, PrependPath, Verbose);
+ return Parser.parse();
+ } else {
+ return DebugMap::parseYAMLDebugMap(InputFile, PrependPath, Verbose);
+ }
diff --git a/tools/dsymutil/dsymutil.cpp b/tools/dsymutil/dsymutil.cpp
index 4fc91b0..5009193 100644
--- a/tools/dsymutil/dsymutil.cpp
+++ b/tools/dsymutil/dsymutil.cpp
@@ -30,26 +30,32 @@ using namespace llvm::cl;
static opt<std::string> InputFile(Positional, desc("<input file>"),
-static opt<std::string> OutputFileOpt("o", desc("Specify the output file."
- " default: <input file>.dwarf"),
- value_desc("filename"));
+static opt<std::string>
+ OutputFileOpt("o",
+ desc("Specify the output file. default: <input file>.dwarf"),
+ value_desc("filename"));
-static opt<std::string> OsoPrependPath("oso-prepend-path",
- desc("Specify a directory to prepend "
- "to the paths of object files."),
- value_desc("path"));
+static opt<std::string> OsoPrependPath(
+ "oso-prepend-path",
+ desc("Specify a directory to prepend to the paths of object files."),
+ value_desc("path"));
static opt<bool> Verbose("v", desc("Verbosity level"), init(false));
-static opt<bool> NoOutput("no-output", desc("Do the link in memory, but do "
- "not emit the result file."),
- init(false));
static opt<bool>
- ParseOnly("parse-only",
- desc("Only parse the debug map, do not actaully link "
- "the DWARF."),
- init(false));
+ NoOutput("no-output",
+ desc("Do the link in memory, but do not emit the result file."),
+ init(false));
+static opt<bool> DumpDebugMap(
+ "dump-debug-map",
+ desc("Parse and dump the debug map to standard output. Not DWARF link "
+ "will take place."),
+ init(false));
+static opt<bool> InputIsYAMLDebugMap(
+ "y", desc("Treat the input file is a YAML debug map rather than a binary."),
+ init(false));
int main(int argc, char **argv) {
@@ -59,7 +65,9 @@ int main(int argc, char **argv) {
LinkOptions Options;
llvm::cl::ParseCommandLineOptions(argc, argv, "llvm dsymutil\n");
- auto DebugMapPtrOrErr = parseDebugMap(InputFile, OsoPrependPath, Verbose);
+ auto DebugMapPtrOrErr =
+ parseDebugMap(InputFile, OsoPrependPath, Verbose, InputIsYAMLDebugMap);
Options.Verbose = Verbose;
Options.NoOutput = NoOutput;
@@ -75,10 +83,10 @@ int main(int argc, char **argv) {
return 1;
- if (Verbose)
+ if (Verbose || DumpDebugMap)
- if (ParseOnly)
+ if (DumpDebugMap)
return 0;
std::string OutputFile;
diff --git a/tools/dsymutil/dsymutil.h b/tools/dsymutil/dsymutil.h
index e9f7cd9..4089187 100644
--- a/tools/dsymutil/dsymutil.h
+++ b/tools/dsymutil/dsymutil.h
@@ -33,9 +33,10 @@ struct LinkOptions {
/// \brief Extract the DebugMap from the given file.
/// The file has to be a MachO object file.
-parseDebugMap(StringRef InputFile, StringRef PrependPath = "",
- bool Verbose = false);
+llvm::ErrorOr<std::unique_ptr<DebugMap>> parseDebugMap(StringRef InputFile,
+ StringRef PrependPath,
+ bool Verbose,
+ bool InputIsYAML);
/// \brief Link the Dwarf debuginfo as directed by the passed DebugMap
/// \p DM into a DwarfFile named \p OutputFilename.
diff --git a/tools/llc/CMakeLists.txt b/tools/llc/CMakeLists.txt
index dcbcf9d..ff5a89e 100644
--- a/tools/llc/CMakeLists.txt
+++ b/tools/llc/CMakeLists.txt
@@ -6,9 +6,11 @@ set(LLVM_LINK_COMPONENTS
+ MIRParser
+ Target
# Support plugins.
diff --git a/tools/llc/LLVMBuild.txt b/tools/llc/LLVMBuild.txt
index 45cdc64..38660cf 100644
--- a/tools/llc/LLVMBuild.txt
+++ b/tools/llc/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Tool
name = llc
parent = Tools
-required_libraries = AsmParser BitReader IRReader all-targets
+required_libraries = AsmParser BitReader IRReader MIRParser all-targets
diff --git a/tools/llc/Makefile b/tools/llc/Makefile
index 71bce4d..ae64c9a 100644
--- a/tools/llc/Makefile
+++ b/tools/llc/Makefile
@@ -9,7 +9,7 @@
LEVEL := ../..
-LINK_COMPONENTS := all-targets bitreader asmparser irreader
+LINK_COMPONENTS := all-targets bitreader asmparser irreader mirparser
# Support plugins.
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index 0977418..fadcfa9 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/CodeGen/LinkAllAsmWriterComponents.h"
#include "llvm/CodeGen/LinkAllCodegenComponents.h"
+#include "llvm/CodeGen/MIRParser/MIRParser.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LLVMContext.h"
@@ -109,6 +110,8 @@ GetOutputStream(const char *TargetName, Triple::OSType OS,
StringRef IFN = InputFilename;
if (IFN.endswith(".bc") || IFN.endswith(".ll"))
OutputFilename = IFN.drop_back(3);
+ else if (IFN.endswith(".mir"))
+ OutputFilename = IFN.drop_back(4);
OutputFilename = IFN;
@@ -214,7 +217,10 @@ static int compileModule(char **argv, LLVMContext &Context) {
// If user just wants to list available options, skip module loading
if (!SkipModule) {
- M = parseIRFile(InputFilename, Err, Context);
+ if (StringRef(InputFilename).endswith_lower(".mir"))
+ M = parseMIRFile(InputFilename, Err, Context);
+ else
+ M = parseIRFile(InputFilename, Err, Context);
if (!M) {
Err.print(argv[0], errs());
return 1;
diff --git a/tools/lli/CMakeLists.txt b/tools/lli/CMakeLists.txt
index 844b994..4af0596 100644
--- a/tools/lli/CMakeLists.txt
+++ b/tools/lli/CMakeLists.txt
@@ -14,6 +14,7 @@ set(LLVM_LINK_COMPONENTS
+ Target
diff --git a/tools/lli/OrcLazyJIT.cpp b/tools/lli/OrcLazyJIT.cpp
index bda5d6d..afccfa6 100644
--- a/tools/lli/OrcLazyJIT.cpp
+++ b/tools/lli/OrcLazyJIT.cpp
@@ -108,6 +108,9 @@ OrcLazyJIT::TransformFtor OrcLazyJIT::createDebugDumper() {
llvm_unreachable("Unknown DumpKind");
+// Defined in lli.cpp.
+CodeGenOpt::Level getOptLevel();
int llvm::runOrcLazyJIT(std::unique_ptr<Module> M, int ArgC, char* ArgV[]) {
// Add the program's symbols into the JIT's search space.
if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr)) {
@@ -117,7 +120,9 @@ int llvm::runOrcLazyJIT(std::unique_ptr<Module> M, int ArgC, char* ArgV[]) {
// Grab a target machine and try to build a factory function for the
// target-specific Orc callback manager.
- auto TM = std::unique_ptr<TargetMachine>(EngineBuilder().selectTarget());
+ EngineBuilder EB;
+ EB.setOptLevel(getOptLevel());
+ auto TM = std::unique_ptr<TargetMachine>(EB.selectTarget());
auto &Context = getGlobalContext();
auto CallbackMgrBuilder =
diff --git a/tools/lli/OrcLazyJIT.h b/tools/lli/OrcLazyJIT.h
index bff2eca..c4a12b6 100644
--- a/tools/lli/OrcLazyJIT.h
+++ b/tools/lli/OrcLazyJIT.h
@@ -116,8 +116,7 @@ public:
orc::CtorDtorRunner<CODLayerT> CtorRunner(std::move(CtorNames), H);
- IRStaticDestructorRunners.push_back(
- orc::CtorDtorRunner<CODLayerT>(std::move(DtorNames), H));
+ IRStaticDestructorRunners.emplace_back(std::move(DtorNames), H);
return H;
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index 6916d16..057841f 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -365,6 +365,19 @@ static void addCygMingExtraModule(ExecutionEngine *EE,
+CodeGenOpt::Level getOptLevel() {
+ switch (OptLevel) {
+ default:
+ errs() << "lli: Invalid optimization level.\n";
+ exit(1);
+ case '0': return CodeGenOpt::None;
+ case '1': return CodeGenOpt::Less;
+ case ' ':
+ case '2': return CodeGenOpt::Default;
+ case '3': return CodeGenOpt::Aggressive;
+ }
+ llvm_unreachable("Unrecognized opt level.");
// main Driver function
@@ -451,18 +464,7 @@ int main(int argc, char **argv, char * const *envp) {
- CodeGenOpt::Level OLvl = CodeGenOpt::Default;
- switch (OptLevel) {
- default:
- errs() << argv[0] << ": invalid optimization level.\n";
- return 1;
- case ' ': break;
- case '0': OLvl = CodeGenOpt::None; break;
- case '1': OLvl = CodeGenOpt::Less; break;
- case '2': OLvl = CodeGenOpt::Default; break;
- case '3': OLvl = CodeGenOpt::Aggressive; break;
- }
- builder.setOptLevel(OLvl);
+ builder.setOptLevel(getOptLevel());
TargetOptions Options;
if (FloatABIForCalls != FloatABI::Default)
diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp
index f813465..1f55e8a 100644
--- a/tools/llvm-ar/llvm-ar.cpp
+++ b/tools/llvm-ar/llvm-ar.cpp
@@ -16,6 +16,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Object/Archive.h"
+#include "llvm/Object/ArchiveWriter.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Errc.h"
@@ -45,16 +46,9 @@ using namespace llvm;
// The name this program was invoked as.
static StringRef ToolName;
-static const char *TemporaryOutput;
-static int TmpArchiveFD = -1;
// Show the error message and exit.
LLVM_ATTRIBUTE_NORETURN static void fail(Twine Error) {
outs() << ToolName << ": " << Error << ".\n";
- if (TmpArchiveFD != -1)
- close(TmpArchiveFD);
- if (TemporaryOutput)
- sys::fs::remove(TemporaryOutput);
@@ -405,70 +399,6 @@ static void performReadOperation(ArchiveOperation Operation,
-namespace {
-class NewArchiveIterator {
- bool IsNewMember;
- StringRef Name;
- object::Archive::child_iterator OldI;
- StringRef NewFilename;
- NewArchiveIterator(object::Archive::child_iterator I, StringRef Name);
- NewArchiveIterator(StringRef I, StringRef Name);
- NewArchiveIterator();
- bool isNewMember() const;
- StringRef getName() const;
- object::Archive::child_iterator getOld() const;
- StringRef getNew() const;
- int getFD(sys::fs::file_status &NewStatus) const;
- const sys::fs::file_status &getStatus() const;
-NewArchiveIterator::NewArchiveIterator() {}
-NewArchiveIterator::NewArchiveIterator(object::Archive::child_iterator I,
- StringRef Name)
- : IsNewMember(false), Name(Name), OldI(I) {}
-NewArchiveIterator::NewArchiveIterator(StringRef NewFilename, StringRef Name)
- : IsNewMember(true), Name(Name), NewFilename(NewFilename) {}
-StringRef NewArchiveIterator::getName() const { return Name; }
-bool NewArchiveIterator::isNewMember() const { return IsNewMember; }
-object::Archive::child_iterator NewArchiveIterator::getOld() const {
- assert(!IsNewMember);
- return OldI;
-StringRef NewArchiveIterator::getNew() const {
- assert(IsNewMember);
- return NewFilename;
-int NewArchiveIterator::getFD(sys::fs::file_status &NewStatus) const {
- assert(IsNewMember);
- int NewFD;
- failIfError(sys::fs::openFileForRead(NewFilename, NewFD), NewFilename);
- assert(NewFD != -1);
- failIfError(sys::fs::status(NewFD, NewStatus), NewFilename);
- // Opening a directory doesn't make sense. Let it fail.
- // Linux cannot open directories with open(2), although
- // cygwin and *bsd can.
- if (NewStatus.type() == sys::fs::file_type::directory_file)
- failIfError(make_error_code(errc::is_a_directory), NewFilename);
- return NewFD;
template <typename T>
void addMember(std::vector<NewArchiveIterator> &Members, T I, StringRef Name,
int Pos = -1) {
@@ -605,269 +535,19 @@ computeNewArchiveMembers(ArchiveOperation Operation,
return Ret;
-template <typename T>
-static void printWithSpacePadding(raw_fd_ostream &OS, T Data, unsigned Size,
- bool MayTruncate = false) {
- uint64_t OldPos = OS.tell();
- OS << Data;
- unsigned SizeSoFar = OS.tell() - OldPos;
- if (Size > SizeSoFar) {
- unsigned Remaining = Size - SizeSoFar;
- for (unsigned I = 0; I < Remaining; ++I)
- OS << ' ';
- } else if (Size < SizeSoFar) {
- assert(MayTruncate && "Data doesn't fit in Size");
- // Some of the data this is used for (like UID) can be larger than the
- // space available in the archive format. Truncate in that case.
- + Size);
- }
-static void print32BE(raw_fd_ostream &Out, unsigned Val) {
- for (int I = 3; I >= 0; --I) {
- char V = (Val >> (8 * I)) & 0xff;
- Out << V;
- }
-static void printRestOfMemberHeader(raw_fd_ostream &Out,
- const sys::TimeValue &ModTime, unsigned UID,
- unsigned GID, unsigned Perms,
- unsigned Size) {
- printWithSpacePadding(Out, ModTime.toEpochTime(), 12);
- printWithSpacePadding(Out, UID, 6, true);
- printWithSpacePadding(Out, GID, 6, true);
- printWithSpacePadding(Out, format("%o", Perms), 8);
- printWithSpacePadding(Out, Size, 10);
- Out << "`\n";
-static void printMemberHeader(raw_fd_ostream &Out, StringRef Name,
- const sys::TimeValue &ModTime, unsigned UID,
- unsigned GID, unsigned Perms, unsigned Size) {
- printWithSpacePadding(Out, Twine(Name) + "/", 16);
- printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size);
-static void printMemberHeader(raw_fd_ostream &Out, unsigned NameOffset,
- const sys::TimeValue &ModTime, unsigned UID,
- unsigned GID, unsigned Perms, unsigned Size) {
- Out << '/';
- printWithSpacePadding(Out, NameOffset, 15);
- printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size);
-static void writeStringTable(raw_fd_ostream &Out,
- ArrayRef<NewArchiveIterator> Members,
- std::vector<unsigned> &StringMapIndexes) {
- unsigned StartOffset = 0;
- for (ArrayRef<NewArchiveIterator>::iterator I = Members.begin(),
- E = Members.end();
- I != E; ++I) {
- StringRef Name = I->getName();
- if (Name.size() < 16)
- continue;
- if (StartOffset == 0) {
- printWithSpacePadding(Out, "//", 58);
- Out << "`\n";
- StartOffset = Out.tell();
- }
- StringMapIndexes.push_back(Out.tell() - StartOffset);
- Out << Name << "/\n";
- }
- if (StartOffset == 0)
- return;
- if (Out.tell() % 2)
- Out << '\n';
- int Pos = Out.tell();
- - 12);
- printWithSpacePadding(Out, Pos - StartOffset, 10);
-// Returns the offset of the first reference to a member offset.
-static unsigned writeSymbolTable(raw_fd_ostream &Out,
- ArrayRef<NewArchiveIterator> Members,
- ArrayRef<MemoryBufferRef> Buffers,
- std::vector<unsigned> &MemberOffsetRefs) {
- unsigned StartOffset = 0;
- unsigned MemberNum = 0;
- std::string NameBuf;
- raw_string_ostream NameOS(NameBuf);
- unsigned NumSyms = 0;
- LLVMContext &Context = getGlobalContext();
- for (ArrayRef<NewArchiveIterator>::iterator I = Members.begin(),
- E = Members.end();
- I != E; ++I, ++MemberNum) {
- MemoryBufferRef MemberBuffer = Buffers[MemberNum];
- ErrorOr<std::unique_ptr<object::SymbolicFile>> ObjOrErr =
- object::SymbolicFile::createSymbolicFile(
- MemberBuffer, sys::fs::file_magic::unknown, &Context);
- if (!ObjOrErr)
- continue; // FIXME: check only for "not an object file" errors.
- object::SymbolicFile &Obj = *ObjOrErr.get();
- if (!StartOffset) {
- printMemberHeader(Out, "", sys::TimeValue::now(), 0, 0, 0, 0);
- StartOffset = Out.tell();
- print32BE(Out, 0);
- }
- for (const object::BasicSymbolRef &S : Obj.symbols()) {
- uint32_t Symflags = S.getFlags();
- if (Symflags & object::SymbolRef::SF_FormatSpecific)
- continue;
- if (!(Symflags & object::SymbolRef::SF_Global))
- continue;
- if (Symflags & object::SymbolRef::SF_Undefined)
- continue;
- failIfError(S.printName(NameOS));
- NameOS << '\0';
- ++NumSyms;
- MemberOffsetRefs.push_back(MemberNum);
- print32BE(Out, 0);
- }
- }
- Out << NameOS.str();
- if (StartOffset == 0)
- return 0;
- if (Out.tell() % 2)
- Out << '\0';
- unsigned Pos = Out.tell();
- - 12);
- printWithSpacePadding(Out, Pos - StartOffset, 10);
- print32BE(Out, NumSyms);
- return StartOffset + 4;
-static void
-performWriteOperation(ArchiveOperation Operation, object::Archive *OldArchive,
- std::vector<NewArchiveIterator> &NewMembers) {
- SmallString<128> TmpArchive;
- failIfError(sys::fs::createUniqueFile(ArchiveName + ".temp-archive-%%%%%%%.a",
- TmpArchiveFD, TmpArchive));
- TemporaryOutput = TmpArchive.c_str();
- tool_output_file Output(TemporaryOutput, TmpArchiveFD);
- raw_fd_ostream &Out = Output.os();
- Out << "!<arch>\n";
- std::vector<unsigned> MemberOffsetRefs;
- std::vector<std::unique_ptr<MemoryBuffer>> Buffers;
- std::vector<MemoryBufferRef> Members;
- std::vector<sys::fs::file_status> NewMemberStatus;
- for (unsigned I = 0, N = NewMembers.size(); I < N; ++I) {
- NewArchiveIterator &Member = NewMembers[I];
- MemoryBufferRef MemberRef;
- if (Member.isNewMember()) {
- StringRef Filename = Member.getNew();
- NewMemberStatus.resize(NewMemberStatus.size() + 1);
- sys::fs::file_status &Status = NewMemberStatus.back();
- int FD = Member.getFD(Status);
- ErrorOr<std::unique_ptr<MemoryBuffer>> MemberBufferOrErr =
- MemoryBuffer::getOpenFile(FD, Filename, Status.getSize(), false);
- failIfError(MemberBufferOrErr.getError(), Filename);
- if (close(FD) != 0)
- fail("Could not close file");
- Buffers.push_back(std::move(MemberBufferOrErr.get()));
- MemberRef = Buffers.back()->getMemBufferRef();
- } else {
- object::Archive::child_iterator OldMember = Member.getOld();
- ErrorOr<MemoryBufferRef> MemberBufferOrErr =
- OldMember->getMemoryBufferRef();
- failIfError(MemberBufferOrErr.getError());
- MemberRef = MemberBufferOrErr.get();
- }
- Members.push_back(MemberRef);
- }
- unsigned MemberReferenceOffset = 0;
- if (Symtab) {
- MemberReferenceOffset =
- writeSymbolTable(Out, NewMembers, Members, MemberOffsetRefs);
- }
- std::vector<unsigned> StringMapIndexes;
- writeStringTable(Out, NewMembers, StringMapIndexes);
- unsigned MemberNum = 0;
- unsigned LongNameMemberNum = 0;
- unsigned NewMemberNum = 0;
- std::vector<unsigned> MemberOffset;
- for (std::vector<NewArchiveIterator>::iterator I = NewMembers.begin(),
- E = NewMembers.end();
- I != E; ++I, ++MemberNum) {
- unsigned Pos = Out.tell();
- MemberOffset.push_back(Pos);
- MemoryBufferRef File = Members[MemberNum];
- if (I->isNewMember()) {
- StringRef FileName = I->getNew();
- const sys::fs::file_status &Status = NewMemberStatus[NewMemberNum];
- NewMemberNum++;
- StringRef Name = sys::path::filename(FileName);
- if (Name.size() < 16)
- printMemberHeader(Out, Name, Status.getLastModificationTime(),
- Status.getUser(), Status.getGroup(),
- Status.permissions(), Status.getSize());
- else
- printMemberHeader(Out, StringMapIndexes[LongNameMemberNum++],
- Status.getLastModificationTime(), Status.getUser(),
- Status.getGroup(), Status.permissions(),
- Status.getSize());
- } else {
- object::Archive::child_iterator OldMember = I->getOld();
- StringRef Name = I->getName();
- if (Name.size() < 16)
- printMemberHeader(Out, Name, OldMember->getLastModified(),
- OldMember->getUID(), OldMember->getGID(),
- OldMember->getAccessMode(), OldMember->getSize());
- else
- printMemberHeader(Out, StringMapIndexes[LongNameMemberNum++],
- OldMember->getLastModified(), OldMember->getUID(),
- OldMember->getGID(), OldMember->getAccessMode(),
- OldMember->getSize());
- }
- Out << File.getBuffer();
- if (Out.tell() % 2)
- Out << '\n';
- }
- if (MemberReferenceOffset) {
- for (unsigned MemberNum : MemberOffsetRefs)
- print32BE(Out, MemberOffset[MemberNum]);
- }
- Output.keep();
- Out.close();
- sys::fs::rename(TemporaryOutput, ArchiveName);
- TemporaryOutput = nullptr;
static void
performWriteOperation(ArchiveOperation Operation, object::Archive *OldArchive,
std::vector<NewArchiveIterator> *NewMembersP) {
if (NewMembersP) {
- performWriteOperation(Operation, OldArchive, *NewMembersP);
+ std::pair<StringRef, std::error_code> Result =
+ writeArchive(ArchiveName, *NewMembersP, Symtab);
+ failIfError(Result.second, Result.first);
std::vector<NewArchiveIterator> NewMembers =
computeNewArchiveMembers(Operation, OldArchive);
- performWriteOperation(Operation, OldArchive, NewMembers);
+ auto Result = writeArchive(ArchiveName, NewMembers, Symtab);
+ failIfError(Result.second, Result.first);
static void createSymbolTable(object::Archive *OldArchive) {
diff --git a/tools/llvm-cov/CodeCoverage.cpp b/tools/llvm-cov/CodeCoverage.cpp
index f85f3b1..4ff5330 100644
--- a/tools/llvm-cov/CodeCoverage.cpp
+++ b/tools/llvm-cov/CodeCoverage.cpp
@@ -116,8 +116,7 @@ CodeCoverageTool::getSourceFile(StringRef SourceFile) {
error(EC.message(), SourceFile);
return EC;
- LoadedSourceFiles.push_back(
- std::make_pair(SourceFile, std::move(Buffer.get())));
+ LoadedSourceFiles.emplace_back(SourceFile, std::move(Buffer.get()));
return *LoadedSourceFiles.back().second;
diff --git a/tools/llvm-cov/llvm-cov.cpp b/tools/llvm-cov/llvm-cov.cpp
index bf66f58..8c5acae 100644
--- a/tools/llvm-cov/llvm-cov.cpp
+++ b/tools/llvm-cov/llvm-cov.cpp
@@ -13,6 +13,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/raw_ostream.h"
@@ -43,6 +44,12 @@ static int helpMain(int argc, const char *argv[]) {
return 0;
+/// \brief Top level version information.
+static int versionMain(int argc, const char *argv[]) {
+ cl::PrintVersionMessage();
+ return 0;
int main(int argc, const char **argv) {
// If argv[0] is or ends with 'gcov', always be gcov compatible
if (sys::path::stem(argv[0]).endswith_lower("gcov"))
@@ -57,6 +64,7 @@ int main(int argc, const char **argv) {
.Case("report", reportMain)
.Case("show", showMain)
.Cases("-h", "-help", "--help", helpMain)
+ .Cases("-version", "--version", versionMain)
if (Func) {
diff --git a/tools/llvm-cxxdump/llvm-cxxdump.cpp b/tools/llvm-cxxdump/llvm-cxxdump.cpp
index 447d55a..ef42211 100644
--- a/tools/llvm-cxxdump/llvm-cxxdump.cpp
+++ b/tools/llvm-cxxdump/llvm-cxxdump.cpp
@@ -204,9 +204,10 @@ static void dumpCXXData(const ObjectFile *Obj) {
StringRef SecContents;
if (error(Sec.getContents(SecContents)))
- uint64_t SymAddress, SymSize;
- if (error(Sym.getAddress(SymAddress)) || error(Sym.getSize(SymSize)))
+ uint64_t SymAddress;
+ if (error(Sym.getAddress(SymAddress)))
+ uint64_t SymSize = Sym.getSize();
uint64_t SecAddress = Sec.getAddress();
uint64_t SecSize = Sec.getSize();
uint64_t SymOffset = SymAddress - SecAddress;
diff --git a/tools/llvm-dwarfdump/CMakeLists.txt b/tools/llvm-dwarfdump/CMakeLists.txt
index 086b139..9a2e53f 100644
--- a/tools/llvm-dwarfdump/CMakeLists.txt
+++ b/tools/llvm-dwarfdump/CMakeLists.txt
@@ -7,3 +7,7 @@ set(LLVM_LINK_COMPONENTS
+ add_subdirectory(fuzzer)
diff --git a/tools/llvm-dwarfdump/fuzzer/CMakeLists.txt b/tools/llvm-dwarfdump/fuzzer/CMakeLists.txt
new file mode 100644
index 0000000..1de35a3
--- /dev/null
+++ b/tools/llvm-dwarfdump/fuzzer/CMakeLists.txt
@@ -0,0 +1,14 @@
+ DebugInfoDWARF
+ Object
+ Support
+ )
+ llvm-dwarfdump-fuzzer.cpp
+ )
+ LLVMFuzzer
+ )
diff --git a/tools/llvm-dwarfdump/fuzzer/llvm-dwarfdump-fuzzer.cpp b/tools/llvm-dwarfdump/fuzzer/llvm-dwarfdump-fuzzer.cpp
new file mode 100644
index 0000000..af0ac36
--- /dev/null
+++ b/tools/llvm-dwarfdump/fuzzer/llvm-dwarfdump-fuzzer.cpp
@@ -0,0 +1,34 @@
+//===-- llvm-dwarfdump-fuzzer.cpp - Fuzz the llvm-dwarfdump tool ----------===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+/// \file
+/// \brief This file implements a function that runs llvm-dwarfdump
+/// on a single input. This function is then linked into the Fuzzer library.
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/MemoryBuffer.h"
+using namespace llvm;
+using namespace object;
+extern "C" void LLVMFuzzerTestOneInput(uint8_t *data, size_t size) {
+ std::unique_ptr<MemoryBuffer> Buff = MemoryBuffer::getMemBuffer(
+ StringRef((const char *)data, size), "", false);
+ ErrorOr<std::unique_ptr<ObjectFile>> ObjOrErr =
+ ObjectFile::createObjectFile(Buff->getMemBufferRef());
+ if (!ObjOrErr)
+ return;
+ ObjectFile &Obj = *ObjOrErr.get();
+ std::unique_ptr<DIContext> DICtx(new DWARFContextInMemory(Obj));
+ DICtx->dump(nulls(), DIDT_All);
diff --git a/tools/llvm-lto/CMakeLists.txt b/tools/llvm-lto/CMakeLists.txt
index 9adf629..3ea1aee 100644
--- a/tools/llvm-lto/CMakeLists.txt
+++ b/tools/llvm-lto/CMakeLists.txt
@@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS
+ Target
diff --git a/tools/llvm-lto/llvm-lto.cpp b/tools/llvm-lto/llvm-lto.cpp
index 9cd6587..9678c83 100644
--- a/tools/llvm-lto/llvm-lto.cpp
+++ b/tools/llvm-lto/llvm-lto.cpp
@@ -253,11 +253,9 @@ int main(int argc, char **argv) {
if (!OutputFilename.empty()) {
- size_t len = 0;
std::string ErrorInfo;
- const void *Code =
- CodeGen.compile(&len, DisableInline, DisableGVNLoadPRE,
- DisableLTOVectorization, ErrorInfo);
+ std::unique_ptr<MemoryBuffer> Code = CodeGen.compile(
+ DisableInline, DisableGVNLoadPRE, DisableLTOVectorization, ErrorInfo);
if (!Code) {
errs() << argv[0]
<< ": error compiling the code: " << ErrorInfo << "\n";
@@ -272,7 +270,7 @@ int main(int argc, char **argv) {
return 1;
- FileStream.write(reinterpret_cast<const char *>(Code), len);
+ FileStream.write(Code->getBufferStart(), Code->getBufferSize());
} else {
std::string ErrorInfo;
const char *OutputName = nullptr;
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index 6a8b493..9a9185c 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -70,6 +70,9 @@ static cl::opt<bool>
PrintImmHex("print-imm-hex", cl::init(false),
cl::desc("Prefer hex format for immediate values"));
+static cl::list<std::string>
+DefineSymbol("defsym", cl::desc("Defines a symbol to be an integer constant"));
enum OutputFileType {
@@ -316,6 +319,26 @@ static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI,
return Error;
+static int fillCommandLineSymbols(MCAsmParser &Parser){
+ for(auto &I: DefineSymbol){
+ auto Pair = StringRef(I).split('=');
+ if(Pair.second.empty()){
+ errs() << "error: defsym must be of the form: sym=value: " << I;
+ return 1;
+ }
+ int64_t Value;
+ if(Pair.second.getAsInteger(0, Value)){
+ errs() << "error: Value is not an integer: " << Pair.second;
+ return 1;
+ }
+ auto &Context = Parser.getContext();
+ auto Symbol = Context.getOrCreateSymbol(Pair.first);
+ Parser.getStreamer().EmitAssignment(Symbol,
+ MCConstantExpr::create(Value, Context));
+ }
+ return 0;
static int AssembleInput(const char *ProgName, const Target *TheTarget,
SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str,
MCAsmInfo &MAI, MCSubtargetInfo &STI,
@@ -331,6 +354,9 @@ static int AssembleInput(const char *ProgName, const Target *TheTarget,
return 1;
+ int SymbolResult = fillCommandLineSymbols(*Parser);
+ if(SymbolResult)
+ return SymbolResult;
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index f3197bb..8013f58 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -934,8 +934,7 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
S.Address = UnknownAddressOrSize;
if (PrintSize && isa<ELFObjectFileBase>(Obj)) {
symbol_iterator SymI = I;
- if (error(SymI->getSize(S.Size)))
- break;
+ S.Size = SymI->getSize();
if (PrintAddress && isa<ObjectFile>(Obj))
if (error(symbol_iterator(I)->getAddress(S.Address)))
diff --git a/tools/llvm-objdump/COFFDump.cpp b/tools/llvm-objdump/COFFDump.cpp
index 4a20b91..976a921 100644
--- a/tools/llvm-objdump/COFFDump.cpp
+++ b/tools/llvm-objdump/COFFDump.cpp
@@ -167,7 +167,7 @@ resolveSectionAndAddress(const COFFObjectFile *Obj, const SymbolRef &Sym,
if (std::error_code EC = Sym.getSection(iter))
return EC;
ResolvedSection = Obj->getCOFFSection(*iter);
- return object_error::success;
+ return std::error_code();
// Given a vector of relocations for a section and an offset into this section
@@ -182,7 +182,7 @@ static std::error_code resolveSymbol(const std::vector<RelocationRef> &Rels,
return EC;
if (Ofs == Offset) {
Sym = *I->getSymbol();
- return object_error::success;
+ return std::error_code();
return object_error::parse_failed;
@@ -204,7 +204,7 @@ getSectionContents(const COFFObjectFile *Obj,
return EC;
if (std::error_code EC = Obj->getSectionContents(Section, Contents))
return EC;
- return object_error::success;
+ return std::error_code();
// Given a vector of relocations for a section and an offset into this section
@@ -217,7 +217,7 @@ static std::error_code resolveSymbolName(const std::vector<RelocationRef> &Rels,
return EC;
if (std::error_code EC = Sym.getName(Name))
return EC;
- return object_error::success;
+ return std::error_code();
static void printCOFFSymbolAddress(llvm::raw_ostream &Out,
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index 84212c9..bf7451e 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -67,10 +67,6 @@ static cl::opt<bool> FullLeadingAddr("full-leading-addr",
static cl::opt<bool> NoLeadingAddr("no-leading-addr",
cl::desc("Print no leading address"));
-static cl::opt<bool>
- PrintImmHex("print-imm-hex",
- cl::desc("Use hex format for immediate values"));
cl::opt<bool> llvm::UniversalHeaders("universal-headers",
cl::desc("Print Mach-O universal headers "
"(requires -macho)"));
@@ -228,19 +224,19 @@ static uint64_t DumpDataInCode(const uint8_t *bytes, uint64_t Length,
if (Length >= 4) {
if (!NoShowRawInsn)
- DumpBytes(ArrayRef<uint8_t>(bytes, 4));
+ dumpBytes(ArrayRef<uint8_t>(bytes, 4), outs());
Value = bytes[3] << 24 | bytes[2] << 16 | bytes[1] << 8 | bytes[0];
outs() << "\t.long " << Value;
Size = 4;
} else if (Length >= 2) {
if (!NoShowRawInsn)
- DumpBytes(ArrayRef<uint8_t>(bytes, 2));
+ dumpBytes(ArrayRef<uint8_t>(bytes, 2), outs());
Value = bytes[1] << 8 | bytes[0];
outs() << "\t.short " << Value;
Size = 2;
} else {
if (!NoShowRawInsn)
- DumpBytes(ArrayRef<uint8_t>(bytes, 2));
+ dumpBytes(ArrayRef<uint8_t>(bytes, 2), outs());
Value = bytes[0];
outs() << "\t.byte " << Value;
Size = 1;
@@ -252,14 +248,14 @@ static uint64_t DumpDataInCode(const uint8_t *bytes, uint64_t Length,
if (!NoShowRawInsn)
- DumpBytes(ArrayRef<uint8_t>(bytes, 1));
+ dumpBytes(ArrayRef<uint8_t>(bytes, 1), outs());
Value = bytes[0];
outs() << "\t.byte " << format("%3u", Value) << "\t@ KIND_JUMP_TABLE8\n";
Size = 1;
if (!NoShowRawInsn)
- DumpBytes(ArrayRef<uint8_t>(bytes, 2));
+ dumpBytes(ArrayRef<uint8_t>(bytes, 2), outs());
Value = bytes[1] << 8 | bytes[0];
outs() << "\t.short " << format("%5u", Value & 0xffff)
<< "\t@ KIND_JUMP_TABLE16\n";
@@ -268,7 +264,7 @@ static uint64_t DumpDataInCode(const uint8_t *bytes, uint64_t Length,
if (!NoShowRawInsn)
- DumpBytes(ArrayRef<uint8_t>(bytes, 4));
+ dumpBytes(ArrayRef<uint8_t>(bytes, 4), outs());
Value = bytes[3] << 24 | bytes[2] << 16 | bytes[1] << 8 | bytes[0];
outs() << "\t.long " << Value;
if (Kind == MachO::DICE_KIND_JUMP_TABLE32)
@@ -281,8 +277,7 @@ static uint64_t DumpDataInCode(const uint8_t *bytes, uint64_t Length,
return Size;
-static void getSectionsAndSymbols(const MachO::mach_header Header,
- MachOObjectFile *MachOObj,
+static void getSectionsAndSymbols(MachOObjectFile *MachOObj,
std::vector<SectionRef> &Sections,
std::vector<SymbolRef> &Symbols,
SmallVectorImpl<uint64_t> &FoundFns,
@@ -300,10 +295,8 @@ static void getSectionsAndSymbols(const MachO::mach_header Header,
- MachOObjectFile::LoadCommandInfo Command =
- MachOObj->getFirstLoadCommandInfo();
bool BaseSegmentAddressSet = false;
- for (unsigned i = 0;; ++i) {
+ for (const auto &Command : MachOObj->load_commands()) {
if (Command.C.cmd == MachO::LC_FUNCTION_STARTS) {
// We found a function starts segment, parse the addresses for later
// consumption.
@@ -319,11 +312,6 @@ static void getSectionsAndSymbols(const MachO::mach_header Header,
BaseSegmentAddress = SLC.vmaddr;
- if (i == Header.ncmds - 1)
- break;
- else
- Command = MachOObj->getNextLoadCommandInfo(Command);
@@ -386,9 +374,7 @@ static void PrintIndirectSymbolTable(MachOObjectFile *O, bool verbose,
static void PrintIndirectSymbols(MachOObjectFile *O, bool verbose) {
- uint32_t LoadCommandCount = O->getHeader().ncmds;
- MachOObjectFile::LoadCommandInfo Load = O->getFirstLoadCommandInfo();
- for (unsigned I = 0;; ++I) {
+ for (const auto &Load : O->load_commands()) {
if (Load.C.cmd == MachO::LC_SEGMENT_64) {
MachO::segment_command_64 Seg = O->getSegment64LoadCommand(Load);
for (unsigned J = 0; J < Seg.nsects; ++J) {
@@ -446,10 +432,6 @@ static void PrintIndirectSymbols(MachOObjectFile *O, bool verbose) {
- if (I == LoadCommandCount - 1)
- break;
- else
- Load = O->getNextLoadCommandInfo(Load);
@@ -553,9 +535,8 @@ static void PrintLinkOptHints(MachOObjectFile *O) {
static void PrintDylibs(MachOObjectFile *O, bool JustId) {
- uint32_t LoadCommandCount = O->getHeader().ncmds;
- MachOObjectFile::LoadCommandInfo Load = O->getFirstLoadCommandInfo();
- for (unsigned I = 0;; ++I) {
+ unsigned Index = 0;
+ for (const auto &Load : O->load_commands()) {
if ((JustId && Load.C.cmd == MachO::LC_ID_DYLIB) ||
(!JustId && (Load.C.cmd == MachO::LC_ID_DYLIB ||
Load.C.cmd == MachO::LC_LOAD_DYLIB ||
@@ -595,13 +576,9 @@ static void PrintDylibs(MachOObjectFile *O, bool JustId) {
outs() << "LC_LOAD_UPWARD_DYLIB ";
outs() << "LC_??? ";
- outs() << "command " << I << "\n";
+ outs() << "command " << Index++ << "\n";
- if (I == LoadCommandCount - 1)
- break;
- else
- Load = O->getNextLoadCommandInfo(Load);
@@ -2132,9 +2109,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
// it returns a pointer to that string. Else it returns nullptr.
static const char *GuessCstringPointer(uint64_t ReferenceValue,
struct DisassembleInfo *info) {
- uint32_t LoadCommandCount = info->O->getHeader().ncmds;
- MachOObjectFile::LoadCommandInfo Load = info->O->getFirstLoadCommandInfo();
- for (unsigned I = 0;; ++I) {
+ for (const auto &Load : info->O->load_commands()) {
if (Load.C.cmd == MachO::LC_SEGMENT_64) {
MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load);
for (unsigned J = 0; J < Seg.nsects; ++J) {
@@ -2178,10 +2153,6 @@ static const char *GuessCstringPointer(uint64_t ReferenceValue,
- if (I == LoadCommandCount - 1)
- break;
- else
- Load = info->O->getNextLoadCommandInfo(Load);
return nullptr;
@@ -2192,11 +2163,9 @@ static const char *GuessCstringPointer(uint64_t ReferenceValue,
// symbol name being referenced by the stub or pointer.
static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
struct DisassembleInfo *info) {
- uint32_t LoadCommandCount = info->O->getHeader().ncmds;
- MachOObjectFile::LoadCommandInfo Load = info->O->getFirstLoadCommandInfo();
MachO::dysymtab_command Dysymtab = info->O->getDysymtabLoadCommand();
MachO::symtab_command Symtab = info->O->getSymtabLoadCommand();
- for (unsigned I = 0;; ++I) {
+ for (const auto &Load : info->O->load_commands()) {
if (Load.C.cmd == MachO::LC_SEGMENT_64) {
MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load);
for (unsigned J = 0; J < Seg.nsects; ++J) {
@@ -2266,10 +2235,6 @@ static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
- if (I == LoadCommandCount - 1)
- break;
- else
- Load = info->O->getNextLoadCommandInfo(Load);
return nullptr;
@@ -2356,9 +2321,7 @@ static uint64_t GuessPointerPointer(uint64_t ReferenceValue,
selref = false;
msgref = false;
cfstring = false;
- uint32_t LoadCommandCount = info->O->getHeader().ncmds;
- MachOObjectFile::LoadCommandInfo Load = info->O->getFirstLoadCommandInfo();
- for (unsigned I = 0;; ++I) {
+ for (const auto &Load : info->O->load_commands()) {
if (Load.C.cmd == MachO::LC_SEGMENT_64) {
MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load);
for (unsigned J = 0; J < Seg.nsects; ++J) {
@@ -2403,10 +2366,6 @@ static uint64_t GuessPointerPointer(uint64_t ReferenceValue,
// TODO: Look for LC_SEGMENT for 32-bit Mach-O files.
- if (I == LoadCommandCount - 1)
- break;
- else
- Load = info->O->getNextLoadCommandInfo(Load);
return 0;
@@ -6075,7 +6034,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
SmallVector<uint64_t, 8> FoundFns;
uint64_t BaseSegmentAddress;
- getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns,
+ getSectionsAndSymbols(MachOOF, Sections, Symbols, FoundFns,
// Sort the symbols by address, just in case they didn't come in that way.
@@ -6313,7 +6272,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
DebugOut, Annotations);
if (gotInst) {
if (!NoShowRawInsn) {
- DumpBytes(ArrayRef<uint8_t>( + Index, Size));
+ dumpBytes(ArrayRef<uint8_t>( + Index, Size), outs());
formatted_raw_ostream FormattedOS(outs());
@@ -6378,7 +6337,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
if (!NoShowRawInsn) {
outs() << "\t";
- DumpBytes(ArrayRef<uint8_t>( + Index, InstSize));
+ dumpBytes(ArrayRef<uint8_t>( + Index, InstSize), outs());
IP->printInst(&Inst, outs(), "", *STI);
outs() << "\n";
@@ -8367,15 +8326,12 @@ static void PrintLinkEditDataCommand(MachO::linkedit_data_command ld,
outs() << "\n";
-static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t ncmds,
- uint32_t filetype, uint32_t cputype,
- bool verbose) {
- if (ncmds == 0)
- return;
+static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t filetype,
+ uint32_t cputype, bool verbose) {
StringRef Buf = Obj->getData();
- MachOObjectFile::LoadCommandInfo Command = Obj->getFirstLoadCommandInfo();
- for (unsigned i = 0;; ++i) {
- outs() << "Load command " << i << "\n";
+ unsigned Index = 0;
+ for (const auto &Command : Obj->load_commands()) {
+ outs() << "Load command " << Index++ << "\n";
if (Command.C.cmd == MachO::LC_SEGMENT) {
MachO::segment_command SLC = Obj->getSegmentLoadCommand(Command);
const char *sg_segname = SLC.segname;
@@ -8494,14 +8450,10 @@ static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t ncmds,
// TODO: get and print the raw bytes of the load command.
// TODO: print all the other kinds of load commands.
- if (i == ncmds - 1)
- break;
- else
- Command = Obj->getNextLoadCommandInfo(Command);
-static void getAndPrintMachHeader(const MachOObjectFile *Obj, uint32_t &ncmds,
+static void getAndPrintMachHeader(const MachOObjectFile *Obj,
uint32_t &filetype, uint32_t &cputype,
bool verbose) {
if (Obj->is64Bit()) {
@@ -8509,7 +8461,6 @@ static void getAndPrintMachHeader(const MachOObjectFile *Obj, uint32_t &ncmds,
H_64 = Obj->getHeader64();
PrintMachHeader(H_64.magic, H_64.cputype, H_64.cpusubtype, H_64.filetype,
H_64.ncmds, H_64.sizeofcmds, H_64.flags, verbose);
- ncmds = H_64.ncmds;
filetype = H_64.filetype;
cputype = H_64.cputype;
} else {
@@ -8517,7 +8468,6 @@ static void getAndPrintMachHeader(const MachOObjectFile *Obj, uint32_t &ncmds,
H = Obj->getHeader();
PrintMachHeader(H.magic, H.cputype, H.cpusubtype, H.filetype, H.ncmds,
H.sizeofcmds, H.flags, verbose);
- ncmds = H.ncmds;
filetype = H.filetype;
cputype = H.cputype;
@@ -8525,11 +8475,10 @@ static void getAndPrintMachHeader(const MachOObjectFile *Obj, uint32_t &ncmds,
void llvm::printMachOFileHeader(const object::ObjectFile *Obj) {
const MachOObjectFile *file = dyn_cast<const MachOObjectFile>(Obj);
- uint32_t ncmds = 0;
uint32_t filetype = 0;
uint32_t cputype = 0;
- getAndPrintMachHeader(file, ncmds, filetype, cputype, !NonVerbose);
- PrintLoadCommands(file, ncmds, filetype, cputype, !NonVerbose);
+ getAndPrintMachHeader(file, filetype, cputype, !NonVerbose);
+ PrintLoadCommands(file, filetype, cputype, !NonVerbose);
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index e442ac0..1152a15 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -32,12 +32,14 @@
#include "llvm/MC/MCRelocationInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Object/Archive.h"
+#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/GraphWriter.h"
@@ -147,6 +149,10 @@ static cl::alias
PrivateHeadersShort("p", cl::desc("Alias for --private-headers"),
+ llvm::PrintImmHex("print-imm-hex",
+ cl::desc("Use hex format for immediate values"));
static StringRef ToolName;
static int ReturnValue = EXIT_SUCCESS;
@@ -160,6 +166,12 @@ bool llvm::error(std::error_code EC) {
return true;
+static void report_error(StringRef File, std::error_code EC) {
+ assert(EC);
+ errs() << ToolName << ": '" << File << "': " << EC.message() << ".\n";
+ ReturnValue = EXIT_FAILURE;
static const Target *getTarget(const ObjectFile *Obj = nullptr) {
// Figure out the target triple.
llvm::Triple TheTriple("unknown-unknown-unknown");
@@ -194,19 +206,6 @@ static const Target *getTarget(const ObjectFile *Obj = nullptr) {
return TheTarget;
-void llvm::DumpBytes(ArrayRef<uint8_t> bytes) {
- static const char hex_rep[] = "0123456789abcdef";
- SmallString<64> output;
- for (char i: bytes) {
- output.push_back(hex_rep[(i & 0xF0) >> 4]);
- output.push_back(hex_rep[i & 0xF]);
- output.push_back(' ');
- }
- outs() << output.c_str();
bool llvm::RelocAddressLess(RelocationRef a, RelocationRef b) {
uint64_t a_addr, b_addr;
if (error(a.getOffset(a_addr))) return false;
@@ -214,6 +213,474 @@ bool llvm::RelocAddressLess(RelocationRef a, RelocationRef b) {
return a_addr < b_addr;
+namespace {
+class PrettyPrinter {
+ virtual ~PrettyPrinter(){}
+ virtual void printInst(MCInstPrinter &IP, const MCInst *MI,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &OS, StringRef Annot,
+ MCSubtargetInfo const &STI) {
+ outs() << format("%8" PRIx64 ":", Address);
+ if (!NoShowRawInsn) {
+ outs() << "\t";
+ dumpBytes(Bytes, outs());
+ }
+ IP.printInst(MI, outs(), "", STI);
+ }
+PrettyPrinter PrettyPrinterInst;
+class HexagonPrettyPrinter : public PrettyPrinter {
+ void printLead(ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &OS) {
+ uint32_t opcode =
+ (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | Bytes[0];
+ OS << format("%8" PRIx64 ":", Address);
+ if (!NoShowRawInsn) {
+ OS << "\t";
+ dumpBytes(Bytes.slice(0, 4), OS);
+ OS << format("%08" PRIx32, opcode);
+ }
+ }
+ void printInst(MCInstPrinter &IP, const MCInst *MI,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &OS, StringRef Annot,
+ MCSubtargetInfo const &STI) override {
+ std::string Buffer;
+ {
+ raw_string_ostream TempStream(Buffer);
+ IP.printInst(MI, TempStream, "", STI);
+ }
+ StringRef Contents(Buffer);
+ // Split off bundle attributes
+ auto PacketBundle = Contents.rsplit('\n');
+ // Split off first instruction from the rest
+ auto HeadTail = PacketBundle.first.split('\n');
+ auto Preamble = " { ";
+ auto Separator = "";
+ while(!HeadTail.first.empty()) {
+ OS << Separator;
+ Separator = "\n";
+ printLead(Bytes, Address, OS);
+ OS << Preamble;
+ Preamble = " ";
+ StringRef Inst;
+ auto Duplex = HeadTail.first.split('\v');
+ if(!Duplex.second.empty()){
+ OS << Duplex.first;
+ OS << "; ";
+ Inst = Duplex.second;
+ }
+ else
+ Inst = HeadTail.first;
+ OS << Inst;
+ Bytes = Bytes.slice(4);
+ Address += 4;
+ HeadTail = HeadTail.second.split('\n');
+ }
+ OS << " } " << PacketBundle.second;
+ }
+HexagonPrettyPrinter HexagonPrettyPrinterInst;
+PrettyPrinter &selectPrettyPrinter(Triple const &Triple) {
+ switch(Triple.getArch()) {
+ default:
+ return PrettyPrinterInst;
+ case Triple::hexagon:
+ return HexagonPrettyPrinterInst;
+ }
+template <class ELFT>
+static const typename ELFObjectFile<ELFT>::Elf_Rel *
+getRel(const ELFFile<ELFT> &EF, DataRefImpl Rel) {
+ typedef typename ELFObjectFile<ELFT>::Elf_Rel Elf_Rel;
+ return EF.template getEntry<Elf_Rel>(Rel.d.a, Rel.d.b);
+template <class ELFT>
+static const typename ELFObjectFile<ELFT>::Elf_Rela *
+getRela(const ELFFile<ELFT> &EF, DataRefImpl Rela) {
+ typedef typename ELFObjectFile<ELFT>::Elf_Rela Elf_Rela;
+ return EF.template getEntry<Elf_Rela>(Rela.d.a, Rela.d.b);
+template <class ELFT>
+static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
+ DataRefImpl Rel,
+ SmallVectorImpl<char> &Result) {
+ typedef typename ELFObjectFile<ELFT>::Elf_Sym Elf_Sym;
+ typedef typename ELFObjectFile<ELFT>::Elf_Shdr Elf_Shdr;
+ const ELFFile<ELFT> &EF = *Obj->getELFFile();
+ const Elf_Shdr *sec = EF.getSection(Rel.d.a);
+ uint8_t type;
+ StringRef res;
+ int64_t addend = 0;
+ uint16_t symbol_index = 0;
+ switch (sec->sh_type) {
+ default:
+ return object_error::parse_failed;
+ case ELF::SHT_REL: {
+ type = getRel(EF, Rel)->getType(EF.isMips64EL());
+ symbol_index = getRel(EF, Rel)->getSymbol(EF.isMips64EL());
+ // TODO: Read implicit addend from section data.
+ break;
+ }
+ case ELF::SHT_RELA: {
+ type = getRela(EF, Rel)->getType(EF.isMips64EL());
+ symbol_index = getRela(EF, Rel)->getSymbol(EF.isMips64EL());
+ addend = getRela(EF, Rel)->r_addend;
+ break;
+ }
+ }
+ const Elf_Sym *symb =
+ EF.template getEntry<Elf_Sym>(sec->sh_link, symbol_index);
+ StringRef Target;
+ const Elf_Shdr *SymSec = EF.getSection(symb);
+ if (symb->getType() == ELF::STT_SECTION) {
+ ErrorOr<StringRef> SecName = EF.getSectionName(SymSec);
+ if (std::error_code EC = SecName.getError())
+ return EC;
+ Target = *SecName;
+ } else {
+ ErrorOr<StringRef> SymName =
+ EF.getSymbolName(EF.getSection(sec->sh_link), symb);
+ if (!SymName)
+ return SymName.getError();
+ Target = *SymName;
+ }
+ switch (EF.getHeader()->e_machine) {
+ case ELF::EM_X86_64:
+ switch (type) {
+ case ELF::R_X86_64_PC8:
+ case ELF::R_X86_64_PC16:
+ case ELF::R_X86_64_PC32: {
+ std::string fmtbuf;
+ raw_string_ostream fmt(fmtbuf);
+ fmt << Target << (addend < 0 ? "" : "+") << addend << "-P";
+ fmt.flush();
+ Result.append(fmtbuf.begin(), fmtbuf.end());
+ } break;
+ case ELF::R_X86_64_8:
+ case ELF::R_X86_64_16:
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S:
+ case ELF::R_X86_64_64: {
+ std::string fmtbuf;
+ raw_string_ostream fmt(fmtbuf);
+ fmt << Target << (addend < 0 ? "" : "+") << addend;
+ fmt.flush();
+ Result.append(fmtbuf.begin(), fmtbuf.end());
+ } break;
+ default:
+ res = "Unknown";
+ }
+ break;
+ case ELF::EM_AARCH64: {
+ std::string fmtbuf;
+ raw_string_ostream fmt(fmtbuf);
+ fmt << Target;
+ if (addend != 0)
+ fmt << (addend < 0 ? "" : "+") << addend;
+ fmt.flush();
+ Result.append(fmtbuf.begin(), fmtbuf.end());
+ break;
+ }
+ case ELF::EM_386:
+ case ELF::EM_ARM:
+ case ELF::EM_MIPS:
+ res = Target;
+ break;
+ default:
+ res = "Unknown";
+ }
+ if (Result.empty())
+ Result.append(res.begin(), res.end());
+ return std::error_code();
+static std::error_code getRelocationValueString(const ELFObjectFileBase *Obj,
+ const RelocationRef &RelRef,
+ SmallVectorImpl<char> &Result) {
+ DataRefImpl Rel = RelRef.getRawDataRefImpl();
+ if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj))
+ return getRelocationValueString(ELF32LE, Rel, Result);
+ if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj))
+ return getRelocationValueString(ELF64LE, Rel, Result);
+ if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj))
+ return getRelocationValueString(ELF32BE, Rel, Result);
+ auto *ELF64BE = cast<ELF64BEObjectFile>(Obj);
+ return getRelocationValueString(ELF64BE, Rel, Result);
+static std::error_code getRelocationValueString(const COFFObjectFile *Obj,
+ const RelocationRef &Rel,
+ SmallVectorImpl<char> &Result) {
+ symbol_iterator SymI = Rel.getSymbol();
+ StringRef SymName;
+ if (std::error_code EC = SymI->getName(SymName))
+ return EC;
+ Result.append(SymName.begin(), SymName.end());
+ return std::error_code();
+static void printRelocationTargetName(const MachOObjectFile *O,
+ const MachO::any_relocation_info &RE,
+ raw_string_ostream &fmt) {
+ bool IsScattered = O->isRelocationScattered(RE);
+ // Target of a scattered relocation is an address. In the interest of
+ // generating pretty output, scan through the symbol table looking for a
+ // symbol that aligns with that address. If we find one, print it.
+ // Otherwise, we just print the hex address of the target.
+ if (IsScattered) {
+ uint32_t Val = O->getPlainRelocationSymbolNum(RE);
+ for (const SymbolRef &Symbol : O->symbols()) {
+ std::error_code ec;
+ uint64_t Addr;
+ StringRef Name;
+ if ((ec = Symbol.getAddress(Addr)))
+ report_fatal_error(ec.message());
+ if (Addr != Val)
+ continue;
+ if ((ec = Symbol.getName(Name)))
+ report_fatal_error(ec.message());
+ fmt << Name;
+ return;
+ }
+ // If we couldn't find a symbol that this relocation refers to, try
+ // to find a section beginning instead.
+ for (const SectionRef &Section : O->sections()) {
+ std::error_code ec;
+ StringRef Name;
+ uint64_t Addr = Section.getAddress();
+ if (Addr != Val)
+ continue;
+ if ((ec = Section.getName(Name)))
+ report_fatal_error(ec.message());
+ fmt << Name;
+ return;
+ }
+ fmt << format("0x%x", Val);
+ return;
+ }
+ StringRef S;
+ bool isExtern = O->getPlainRelocationExternal(RE);
+ uint64_t Val = O->getPlainRelocationSymbolNum(RE);
+ if (isExtern) {
+ symbol_iterator SI = O->symbol_begin();
+ advance(SI, Val);
+ SI->getName(S);
+ } else {
+ section_iterator SI = O->section_begin();
+ // Adjust for the fact that sections are 1-indexed.
+ advance(SI, Val - 1);
+ SI->getName(S);
+ }
+ fmt << S;
+static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
+ const RelocationRef &RelRef,
+ SmallVectorImpl<char> &Result) {
+ DataRefImpl Rel = RelRef.getRawDataRefImpl();
+ MachO::any_relocation_info RE = Obj->getRelocation(Rel);
+ unsigned Arch = Obj->getArch();
+ std::string fmtbuf;
+ raw_string_ostream fmt(fmtbuf);
+ unsigned Type = Obj->getAnyRelocationType(RE);
+ bool IsPCRel = Obj->getAnyRelocationPCRel(RE);
+ // Determine any addends that should be displayed with the relocation.
+ // These require decoding the relocation type, which is triple-specific.
+ // X86_64 has entirely custom relocation types.
+ if (Arch == Triple::x86_64) {
+ bool isPCRel = Obj->getAnyRelocationPCRel(RE);
+ switch (Type) {
+ case MachO::X86_64_RELOC_GOT_LOAD:
+ case MachO::X86_64_RELOC_GOT: {
+ printRelocationTargetName(Obj, RE, fmt);
+ fmt << "@GOT";
+ if (isPCRel)
+ fmt << "PCREL";
+ break;
+ }
+ case MachO::X86_64_RELOC_SUBTRACTOR: {
+ DataRefImpl RelNext = Rel;
+ Obj->moveRelocationNext(RelNext);
+ MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
+ // X86_64_RELOC_SUBTRACTOR must be followed by a relocation of type
+ // NOTE: Scattered relocations don't exist on x86_64.
+ unsigned RType = Obj->getAnyRelocationType(RENext);
+ if (RType != MachO::X86_64_RELOC_UNSIGNED)
+ report_fatal_error("Expected X86_64_RELOC_UNSIGNED after "
+ // The X86_64_RELOC_UNSIGNED contains the minuend symbol;
+ // X86_64_RELOC_SUBTRACTOR contains the subtrahend.
+ printRelocationTargetName(Obj, RENext, fmt);
+ fmt << "-";
+ printRelocationTargetName(Obj, RE, fmt);
+ break;
+ }
+ case MachO::X86_64_RELOC_TLV:
+ printRelocationTargetName(Obj, RE, fmt);
+ fmt << "@TLV";
+ if (isPCRel)
+ fmt << "P";
+ break;
+ case MachO::X86_64_RELOC_SIGNED_1:
+ printRelocationTargetName(Obj, RE, fmt);
+ fmt << "-1";
+ break;
+ case MachO::X86_64_RELOC_SIGNED_2:
+ printRelocationTargetName(Obj, RE, fmt);
+ fmt << "-2";
+ break;
+ case MachO::X86_64_RELOC_SIGNED_4:
+ printRelocationTargetName(Obj, RE, fmt);
+ fmt << "-4";
+ break;
+ default:
+ printRelocationTargetName(Obj, RE, fmt);
+ break;
+ }
+ // X86 and ARM share some relocation types in common.
+ } else if (Arch == Triple::x86 || Arch == Triple::arm ||
+ Arch == Triple::ppc) {
+ // Generic relocation types...
+ switch (Type) {
+ case MachO::GENERIC_RELOC_PAIR: // prints no info
+ return std::error_code();
+ DataRefImpl RelNext = Rel;
+ Obj->moveRelocationNext(RelNext);
+ MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
+ // X86 sect diff's must be followed by a relocation of type
+ unsigned RType = Obj->getAnyRelocationType(RENext);
+ if (RType != MachO::GENERIC_RELOC_PAIR)
+ report_fatal_error("Expected GENERIC_RELOC_PAIR after "
+ printRelocationTargetName(Obj, RE, fmt);
+ fmt << "-";
+ printRelocationTargetName(Obj, RENext, fmt);
+ break;
+ }
+ }
+ if (Arch == Triple::x86 || Arch == Triple::ppc) {
+ switch (Type) {
+ DataRefImpl RelNext = Rel;
+ Obj->moveRelocationNext(RelNext);
+ MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
+ // X86 sect diff's must be followed by a relocation of type
+ unsigned RType = Obj->getAnyRelocationType(RENext);
+ if (RType != MachO::GENERIC_RELOC_PAIR)
+ report_fatal_error("Expected GENERIC_RELOC_PAIR after "
+ printRelocationTargetName(Obj, RE, fmt);
+ fmt << "-";
+ printRelocationTargetName(Obj, RENext, fmt);
+ break;
+ }
+ case MachO::GENERIC_RELOC_TLV: {
+ printRelocationTargetName(Obj, RE, fmt);
+ fmt << "@TLV";
+ if (IsPCRel)
+ fmt << "P";
+ break;
+ }
+ default:
+ printRelocationTargetName(Obj, RE, fmt);
+ }
+ } else { // ARM-specific relocations
+ switch (Type) {
+ case MachO::ARM_RELOC_HALF:
+ // Half relocations steal a bit from the length field to encode
+ // whether this is an upper16 or a lower16 relocation.
+ bool isUpper = Obj->getAnyRelocationLength(RE) >> 1;
+ if (isUpper)
+ fmt << ":upper16:(";
+ else
+ fmt << ":lower16:(";
+ printRelocationTargetName(Obj, RE, fmt);
+ DataRefImpl RelNext = Rel;
+ Obj->moveRelocationNext(RelNext);
+ MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
+ // ARM half relocs must be followed by a relocation of type
+ unsigned RType = Obj->getAnyRelocationType(RENext);
+ if (RType != MachO::ARM_RELOC_PAIR)
+ report_fatal_error("Expected ARM_RELOC_PAIR after "
+ // NOTE: The half of the target virtual address is stashed in the
+ // address field of the secondary relocation, but we can't reverse
+ // engineer the constant offset from it without decoding the movw/movt
+ // instruction to find the other half in its immediate field.
+ // ARM_RELOC_HALF_SECTDIFF encodes the second section in the
+ // symbol/section pointer of the follow-on relocation.
+ if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) {
+ fmt << "-";
+ printRelocationTargetName(Obj, RENext, fmt);
+ }
+ fmt << ")";
+ break;
+ }
+ default: { printRelocationTargetName(Obj, RE, fmt); }
+ }
+ }
+ } else
+ printRelocationTargetName(Obj, RE, fmt);
+ fmt.flush();
+ Result.append(fmtbuf.begin(), fmtbuf.end());
+ return std::error_code();
+static std::error_code getRelocationValueString(const RelocationRef &Rel,
+ SmallVectorImpl<char> &Result) {
+ const ObjectFile *Obj = Rel.getObjectFile();
+ if (auto *ELF = dyn_cast<ELFObjectFileBase>(Obj))
+ return getRelocationValueString(ELF, Rel, Result);
+ if (auto *COFF = dyn_cast<COFFObjectFile>(Obj))
+ return getRelocationValueString(COFF, Rel, Result);
+ auto *MachO = cast<MachOObjectFile>(Obj);
+ return getRelocationValueString(MachO, Rel, Result);
static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
const Target *TheTarget = getTarget(Obj);
// getTarget() will have already issued a diagnostic if necessary, so
@@ -280,6 +747,8 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
<< '\n';
+ IP->setPrintImmHex(PrintImmHex);
+ PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName));
StringRef Fmt = Obj->getBytesInAddress() > 4 ? "\t\t%016" PRIx64 ": " :
"\t\t\t%08" PRIx64 ": ";
@@ -352,11 +821,9 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
outs() << SegmentName << ",";
outs() << name << ':';
- // If the section has no symbols just insert a dummy one and disassemble
- // the whole section.
- if (Symbols.empty())
- Symbols.push_back(std::make_pair(0, name));
+ // If the section has no symbol at the start, just insert a dummy one.
+ if (Symbols.empty() || Symbols[0].first != 0)
+ Symbols.insert(Symbols.begin(), std::make_pair(0, name));
SmallString<40> Comments;
raw_svector_ostream CommentStream(Comments);
@@ -396,12 +863,9 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
if (DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
SectionAddr + Index, DebugOut,
CommentStream)) {
- outs() << format("%8" PRIx64 ":", SectionAddr + Index);
- if (!NoShowRawInsn) {
- outs() << "\t";
- DumpBytes(ArrayRef<uint8_t>( + Index, Size));
- }
- IP->printInst(&Inst, outs(), "", *STI);
+ PIP.printInst(*IP, &Inst,
+ Bytes.slice(Index, Size),
+ SectionAddr + Index, outs(), "", *STI);
outs() << CommentStream.str();
outs() << "\n";
@@ -426,8 +890,8 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
// Stop when rel_cur's address is past the current instruction.
if (addr >= Index + Size) break;
if (error(rel_cur->getTypeName(name))) goto skip_print_rel;
- if (error(rel_cur->getValueString(val))) goto skip_print_rel;
+ if (error(getRelocationValueString(*rel_cur, val)))
+ goto skip_print_rel;
outs() << format(, SectionAddr + addr) << name
<< "\t" << val << "\n";
@@ -467,7 +931,7 @@ void llvm::PrintRelocations(const ObjectFile *Obj) {
if (error(Reloc.getOffset(address)))
- if (error(Reloc.getValueString(valuestr)))
+ if (error(getRelocationValueString(Reloc, valuestr)))
outs() << format(, address) << " " << relocname << " "
<< valuestr << "\n";
@@ -608,22 +1072,23 @@ void llvm::PrintSymbolTable(const ObjectFile *o) {
for (const SymbolRef &Symbol : o->symbols()) {
- StringRef Name;
uint64_t Address;
SymbolRef::Type Type;
- uint64_t Size;
uint32_t Flags = Symbol.getFlags();
section_iterator Section = o->section_end();
- if (error(Symbol.getName(Name)))
- continue;
if (error(Symbol.getAddress(Address)))
if (error(Symbol.getType(Type)))
- if (error(Symbol.getSize(Size)))
- continue;
+ uint64_t Size = Symbol.getSize();
if (error(Symbol.getSection(Section)))
+ StringRef Name;
+ if (Type == SymbolRef::ST_Debug && Section != o->section_end()) {
+ Section->getName(Name);
+ } else if (error(Symbol.getName(Name))) {
+ continue;
+ }
bool Global = Flags & SymbolRef::SF_Global;
bool Weak = Flags & SymbolRef::SF_Weak;
@@ -632,9 +1097,7 @@ void llvm::PrintSymbolTable(const ObjectFile *o) {
bool Hidden = Flags & SymbolRef::SF_Hidden;
if (Common) {
- uint32_t Alignment;
- if (error(Symbol.getAlignment(Alignment)))
- Alignment = 0;
+ uint32_t Alignment = Symbol.getAlignment();
Address = Size;
Size = Alignment;
@@ -812,15 +1275,13 @@ static void DumpArchive(const Archive *a) {
if (std::error_code EC = ChildOrErr.getError()) {
// Ignore non-object files.
if (EC != object_error::invalid_file_type)
- errs() << ToolName << ": '" << a->getFileName() << "': " << EC.message()
- << ".\n";
+ report_error(a->getFileName(), EC);
if (ObjectFile *o = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
- errs() << ToolName << ": '" << a->getFileName() << "': "
- << "Unrecognized file type.\n";
+ report_error(a->getFileName(), object_error::invalid_file_type);
@@ -828,7 +1289,7 @@ static void DumpArchive(const Archive *a) {
static void DumpInput(StringRef file) {
// If file isn't stdin, check that it exists.
if (file != "-" && !sys::fs::exists(file)) {
- errs() << ToolName << ": '" << file << "': " << "No such file\n";
+ report_error(file, errc::no_such_file_or_directory);
@@ -843,7 +1304,7 @@ static void DumpInput(StringRef file) {
// Attempt to open the binary.
ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(file);
if (std::error_code EC = BinaryOrErr.getError()) {
- errs() << ToolName << ": '" << file << "': " << EC.message() << ".\n";
+ report_error(file, EC);
Binary &Binary = *BinaryOrErr.get().getBinary();
@@ -853,7 +1314,7 @@ static void DumpInput(StringRef file) {
else if (ObjectFile *o = dyn_cast<ObjectFile>(&Binary))
- errs() << ToolName << ": '" << file << "': " << "Unrecognized file type.\n";
+ report_error(file, object_error::invalid_file_type);
int main(int argc, char **argv) {
diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h
index bde72e0..b4d34f4 100644
--- a/tools/llvm-objdump/llvm-objdump.h
+++ b/tools/llvm-objdump/llvm-objdump.h
@@ -51,11 +51,11 @@ extern cl::opt<bool> SectionHeaders;
extern cl::opt<bool> SectionContents;
extern cl::opt<bool> SymbolTable;
extern cl::opt<bool> UnwindInfo;
+extern cl::opt<bool> PrintImmHex;
// Various helper functions.
bool error(std::error_code ec);
bool RelocAddressLess(object::RelocationRef a, object::RelocationRef b);
-void DumpBytes(ArrayRef<uint8_t> bytes);
void ParseInputMachO(StringRef Filename);
void printCOFFUnwindInfo(const object::COFFObjectFile* o);
void printMachOUnwindInfo(const object::MachOObjectFile* o);
diff --git a/tools/llvm-pdbdump/LinePrinter.h b/tools/llvm-pdbdump/LinePrinter.h
index c2a3ab6..b985e93 100644
--- a/tools/llvm-pdbdump/LinePrinter.h
+++ b/tools/llvm-pdbdump/LinePrinter.h
@@ -41,7 +41,7 @@ private:
void SetFilters(std::list<Regex> &List, Iter Begin, Iter End) {
for (; Begin != End; ++Begin)
- List.push_back(StringRef(*Begin));
+ List.emplace_back(StringRef(*Begin));
raw_ostream &OS;
diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp
index 1412111..4a1d5da 100644
--- a/tools/llvm-readobj/COFFDumper.cpp
+++ b/tools/llvm-readobj/COFFDumper.cpp
@@ -142,7 +142,7 @@ std::error_code COFFDumper::resolveSymbolName(const coff_section *Section,
return EC;
if (std::error_code EC = Symbol.getName(Name))
return EC;
- return object_error::success;
+ return std::error_code();
static const EnumEntry<COFF::MachineTypes> ImageFileMachineType[] = {
diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp
index a20512f..0931cb7 100644
--- a/tools/llvm-readobj/ELFDumper.cpp
+++ b/tools/llvm-readobj/ELFDumper.cpp
@@ -141,19 +141,19 @@ static void
getSectionNameIndex(const ELFO &Obj, typename ELFO::Elf_Sym_Iter Symbol,
StringRef &SectionName, unsigned &SectionIndex) {
SectionIndex = Symbol->st_shndx;
- if (SectionIndex == SHN_UNDEF) {
+ if (Symbol->isUndefined())
SectionName = "Undefined";
- } else if (SectionIndex >= SHN_LOPROC && SectionIndex <= SHN_HIPROC) {
+ else if (Symbol->isProcessorSpecific())
SectionName = "Processor Specific";
- } else if (SectionIndex >= SHN_LOOS && SectionIndex <= SHN_HIOS) {
+ else if (Symbol->isOSSpecific())
SectionName = "Operating System Specific";
- } else if (SectionIndex > SHN_HIOS && SectionIndex < SHN_ABS) {
+ else if (Symbol->isReserved())
SectionName = "Reserved";
- } else if (SectionIndex == SHN_ABS) {
+ else if (Symbol->isAbsolute())
SectionName = "Absolute";
- } else if (SectionIndex == SHN_COMMON) {
+ else if (Symbol->isCommon())
SectionName = "Common";
- } else {
+ else {
if (SectionIndex == SHN_XINDEX)
SectionIndex = Obj.getSymbolTableIndex(&*Symbol);
assert(SectionIndex != SHN_XINDEX &&
@@ -705,26 +705,30 @@ void ELFDumper<ELFT>::printRelocation(const Elf_Shdr *Sec,
typename ELFO::Elf_Rela Rel) {
SmallString<32> RelocName;
Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName);
- StringRef SymbolName;
+ StringRef TargetName;
std::pair<const Elf_Shdr *, const Elf_Sym *> Sym =
Obj->getRelocationSymbol(Sec, &Rel);
- if (Sym.first)
- SymbolName = errorOrDefault(Obj->getSymbolName(Sym.first, Sym.second));
+ if (Sym.second && Sym.second->getType() == ELF::STT_SECTION) {
+ const Elf_Shdr *Sec = Obj->getSection(Sym.second);
+ ErrorOr<StringRef> SecName = Obj->getSectionName(Sec);
+ if (SecName)
+ TargetName = SecName.get();
+ } else if (Sym.first) {
+ TargetName = errorOrDefault(Obj->getSymbolName(Sym.first, Sym.second));
+ }
if (opts::ExpandRelocs) {
DictScope Group(W, "Relocation");
W.printHex("Offset", Rel.r_offset);
W.printNumber("Type", RelocName, (int)Rel.getType(Obj->isMips64EL()));
- W.printNumber("Symbol", SymbolName.size() > 0 ? SymbolName : "-",
+ W.printNumber("Symbol", TargetName.size() > 0 ? TargetName : "-",
W.printHex("Addend", Rel.r_addend);
} else {
raw_ostream& OS = W.startLine();
- OS << W.hex(Rel.r_offset)
- << " " << RelocName
- << " " << (SymbolName.size() > 0 ? SymbolName : "-")
- << " " << W.hex(Rel.r_addend)
- << "\n";
+ OS << W.hex(Rel.r_offset) << " " << RelocName << " "
+ << (TargetName.size() > 0 ? TargetName : "-") << " "
+ << W.hex(Rel.r_addend) << "\n";
@@ -991,11 +995,10 @@ void ELFDumper<ELFT>::printUnwindInfo() {
namespace {
-template <>
-void ELFDumper<ELFType<support::little, 2, false> >::printUnwindInfo() {
+template <> void ELFDumper<ELFType<support::little, false>>::printUnwindInfo() {
const unsigned Machine = Obj->getHeader()->e_machine;
if (Machine == EM_ARM) {
- ARM::EHABI::PrinterContext<ELFType<support::little, 2, false> > Ctx(W, Obj);
+ ARM::EHABI::PrinterContext<ELFType<support::little, false>> Ctx(W, Obj);
return Ctx.PrintUnwindInformation();
W.startLine() << "UnwindInfo not implemented.\n";
@@ -1075,8 +1078,7 @@ void ELFDumper<ELFT>::printAttributes() {
namespace {
-template <>
-void ELFDumper<ELFType<support::little, 2, false> >::printAttributes() {
+template <> void ELFDumper<ELFType<support::little, false>>::printAttributes() {
if (Obj->getHeader()->e_machine != EM_ARM) {
W.startLine() << "Attributes not implemented.\n";
diff --git a/tools/llvm-readobj/Win64EHDumper.cpp b/tools/llvm-readobj/Win64EHDumper.cpp
index f058632..b148c5d 100644
--- a/tools/llvm-readobj/Win64EHDumper.cpp
+++ b/tools/llvm-readobj/Win64EHDumper.cpp
@@ -152,7 +152,7 @@ static std::error_code resolveRelocation(const Dumper::Context &Ctx,
return EC;
ResolvedSection = Ctx.COFF.getCOFFSection(*SI);
- return object_error::success;
+ return std::error_code();
namespace llvm {
diff --git a/tools/llvm-rtdyld/llvm-rtdyld.cpp b/tools/llvm-rtdyld/llvm-rtdyld.cpp
index e87f1e2..f857b2e 100644
--- a/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -47,6 +47,7 @@ InputFileList(cl::Positional, cl::ZeroOrMore,
enum ActionType {
+ AC_PrintObjectLineInfo,
@@ -61,6 +62,8 @@ Action(cl::desc("Action to perform:"),
"Load, link, and print line information for each function."),
clEnumValN(AC_PrintDebugLineInfo, "printdebugline",
"Load, link, and print line information for each function using the debug object"),
+ clEnumValN(AC_PrintObjectLineInfo, "printobjline",
+ "Like -printlineinfo but does not load the object first"),
clEnumValN(AC_Verify, "verify",
"Load, link and verify the resulting memory image."),
@@ -136,6 +139,11 @@ public:
// explicit cache flush, otherwise JIT code manipulations (like resolved
// relocations) will get to the data cache but not to the instruction cache.
virtual void invalidateInstructionCache();
+ void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
+ size_t Size) override {}
+ void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr,
+ size_t Size) override {}
uint8_t *TrivialMemoryManager::allocateCodeSection(uintptr_t Size,
@@ -244,26 +252,69 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
std::unique_ptr<DIContext> Context(
new DWARFContextInMemory(*SymbolObj,LoadedObjInfo.get()));
+ // FIXME: This is generally useful. Figure out a place in lib/Object to
+ // put utility functions.
+ std::map<object::SectionRef, std::vector<uint64_t>> FuncAddresses;
+ if (!isa<ELFObjectFileBase>(SymbolObj)) {
+ for (object::SymbolRef Sym : SymbolObj->symbols()) {
+ object::SymbolRef::Type SymType;
+ if (Sym.getType(SymType))
+ continue;
+ if (SymType != object::SymbolRef::ST_Function)
+ continue;
+ uint64_t Addr;
+ if (Sym.getAddress(Addr))
+ continue;
+ object::section_iterator Sec = SymbolObj->section_end();
+ if (Sym.getSection(Sec))
+ continue;
+ std::vector<uint64_t> &Addrs = FuncAddresses[*Sec];
+ if (Addrs.empty()) {
+ uint64_t SecAddr = Sec->getAddress();
+ uint64_t SecSize = Sec->getSize();
+ Addrs.push_back(SecAddr + SecSize);
+ }
+ Addrs.push_back(Addr);
+ }
+ for (auto &Pair : FuncAddresses) {
+ std::vector<uint64_t> &Addrs = Pair.second;
+ array_pod_sort(Addrs.begin(), Addrs.end());
+ }
+ }
// Use symbol info to iterate functions in the object.
- for (object::symbol_iterator I = SymbolObj->symbol_begin(),
- E = SymbolObj->symbol_end();
- I != E; ++I) {
+ for (object::SymbolRef Sym : SymbolObj->symbols()) {
object::SymbolRef::Type SymType;
- if (I->getType(SymType)) continue;
+ if (Sym.getType(SymType))
+ continue;
if (SymType == object::SymbolRef::ST_Function) {
StringRef Name;
uint64_t Addr;
- uint64_t Size;
- if (I->getName(Name)) continue;
- if (I->getAddress(Addr)) continue;
- if (I->getSize(Size)) continue;
+ if (Sym.getName(Name))
+ continue;
+ if (Sym.getAddress(Addr))
+ continue;
+ uint64_t Size;
+ if (isa<ELFObjectFileBase>(SymbolObj)) {
+ Size = Sym.getSize();
+ } else {
+ object::section_iterator Sec = SymbolObj->section_end();
+ if (Sym.getSection(Sec))
+ continue;
+ const std::vector<uint64_t> &Addrs = FuncAddresses[*Sec];
+ auto AddrI = std::find(Addrs.begin(), Addrs.end(), Addr);
+ assert(AddrI != Addrs.end() && (AddrI + 1) != Addrs.end());
+ assert(*AddrI == Addr);
+ Size = *(AddrI + 1) - Addr;
+ }
// If we're not using the debug object, compute the address of the
// symbol in memory (rather than that in the unrelocated object file)
// and use that to query the DWARFContext.
if (!UseDebugObj && LoadObjects) {
object::section_iterator Sec(SymbolObj->section_end());
- I->getSection(Sec);
+ Sym.getSection(Sec);
StringRef SecName;
uint64_t SectionLoadAddress =
@@ -622,9 +673,11 @@ int main(int argc, char **argv) {
case AC_Execute:
return executeInput();
case AC_PrintDebugLineInfo:
- return printLineInfoForInput(true,true);
+ return printLineInfoForInput(/* LoadObjects */ true,/* UseDebugObj */ true);
case AC_PrintLineInfo:
- return printLineInfoForInput(true,false);
+ return printLineInfoForInput(/* LoadObjects */ true,/* UseDebugObj */false);
+ case AC_PrintObjectLineInfo:
+ return printLineInfoForInput(/* LoadObjects */false,/* UseDebugObj */false);
case AC_Verify:
return linkAndVerify();
diff --git a/tools/llvm-size/llvm-size.cpp b/tools/llvm-size/llvm-size.cpp
index 0e0dd59..c64c1d7 100644
--- a/tools/llvm-size/llvm-size.cpp
+++ b/tools/llvm-size/llvm-size.cpp
@@ -122,12 +122,10 @@ static void PrintDarwinSectionSizes(MachOObjectFile *MachO) {
fmt << "0x";
fmt << "%" << radix_fmt;
- uint32_t LoadCommandCount = MachO->getHeader().ncmds;
uint32_t Filetype = MachO->getHeader().filetype;
- MachOObjectFile::LoadCommandInfo Load = MachO->getFirstLoadCommandInfo();
uint64_t total = 0;
- for (unsigned I = 0;; ++I) {
+ for (const auto &Load : MachO->load_commands()) {
if (Load.C.cmd == MachO::LC_SEGMENT_64) {
MachO::segment_command_64 Seg = MachO->getSegment64LoadCommand(Load);
outs() << "Segment " << Seg.segname << ": "
@@ -181,10 +179,6 @@ static void PrintDarwinSectionSizes(MachOObjectFile *MachO) {
if (Seg.nsects != 0)
outs() << "\ttotal " << format(fmt.str().c_str(), sec_total) << "\n";
- if (I == LoadCommandCount - 1)
- break;
- else
- Load = MachO->getNextLoadCommandInfo(Load);
outs() << "total " << format(fmt.str().c_str(), total) << "\n";
@@ -194,14 +188,11 @@ static void PrintDarwinSectionSizes(MachOObjectFile *MachO) {
/// This is when used when @c OutputFormat is berkeley with a Mach-O file and
/// produces the same output as darwin's size(1) default output.
static void PrintDarwinSegmentSizes(MachOObjectFile *MachO) {
- uint32_t LoadCommandCount = MachO->getHeader().ncmds;
- MachOObjectFile::LoadCommandInfo Load = MachO->getFirstLoadCommandInfo();
uint64_t total_text = 0;
uint64_t total_data = 0;
uint64_t total_objc = 0;
uint64_t total_others = 0;
- for (unsigned I = 0;; ++I) {
+ for (const auto &Load : MachO->load_commands()) {
if (Load.C.cmd == MachO::LC_SEGMENT_64) {
MachO::segment_command_64 Seg = MachO->getSegment64LoadCommand(Load);
if (MachO->getHeader().filetype == MachO::MH_OBJECT) {
@@ -255,10 +246,6 @@ static void PrintDarwinSegmentSizes(MachOObjectFile *MachO) {
total_others += Seg.vmsize;
- if (I == LoadCommandCount - 1)
- break;
- else
- Load = MachO->getNextLoadCommandInfo(Load);
uint64_t total = total_text + total_data + total_objc + total_others;
diff --git a/tools/llvm-symbolizer/LLVMSymbolize.cpp b/tools/llvm-symbolizer/LLVMSymbolize.cpp
index afb7cc8..b8fa838 100644
--- a/tools/llvm-symbolizer/LLVMSymbolize.cpp
+++ b/tools/llvm-symbolizer/LLVMSymbolize.cpp
@@ -113,9 +113,11 @@ void ModuleInfo::addSymbol(const SymbolRef &Symbol, DataExtractor *OpdExtractor,
// occupies the memory range up to the following symbol.
if (isa<MachOObjectFile>(Module))
SymbolSize = 0;
- else if (error(Symbol.getSize(SymbolSize)) ||
- SymbolSize == UnknownAddressOrSize)
- return;
+ else {
+ SymbolSize = Symbol.getSize();
+ if (SymbolSize == UnknownAddressOrSize)
+ return;
+ }
StringRef SymbolName;
if (error(Symbol.getName(SymbolName)))
diff --git a/tools/lto/CMakeLists.txt b/tools/lto/CMakeLists.txt
index 87f42e8..c479fa9 100644
--- a/tools/lto/CMakeLists.txt
+++ b/tools/lto/CMakeLists.txt
@@ -5,6 +5,7 @@ set(LLVM_LINK_COMPONENTS
+ Target
diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp
index d6ceebe..e55708c 100644
--- a/tools/lto/lto.cpp
+++ b/tools/lto/lto.cpp
@@ -73,7 +73,22 @@ static void lto_initialize() {
+namespace {
+// This derived class owns the native object file. This helps implement the
+// libLTO API semantics, which require that the code generator owns the object
+// file.
+struct LibLTOCodeGenerator : LTOCodeGenerator {
+ LibLTOCodeGenerator() {}
+ LibLTOCodeGenerator(std::unique_ptr<LLVMContext> Context)
+ : LTOCodeGenerator(std::move(Context)) {}
+ std::unique_ptr<MemoryBuffer> NativeObjectFile;
// Convert the subtarget features into a string to pass to LTOCodeGenerator.
@@ -235,11 +250,10 @@ static lto_code_gen_t createCodeGen(bool InLocalContext) {
TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
- LTOCodeGenerator *CodeGen =
- InLocalContext ? new LTOCodeGenerator(make_unique<LLVMContext>())
- : new LTOCodeGenerator();
- if (CodeGen)
- CodeGen->setTargetOptions(Options);
+ LibLTOCodeGenerator *CodeGen =
+ InLocalContext ? new LibLTOCodeGenerator(make_unique<LLVMContext>())
+ : new LibLTOCodeGenerator();
+ CodeGen->setTargetOptions(Options);
return wrap(CodeGen);
@@ -304,9 +318,13 @@ bool lto_codegen_write_merged_modules(lto_code_gen_t cg, const char *path) {
const void *lto_codegen_compile(lto_code_gen_t cg, size_t *length) {
- return unwrap(cg)->compile(length, DisableInline,
- DisableGVNLoadPRE, DisableLTOVectorization,
- sLastErrorString);
+ LibLTOCodeGenerator *CG = unwrap(cg);
+ CG->NativeObjectFile = CG->compile(DisableInline, DisableGVNLoadPRE,
+ DisableLTOVectorization, sLastErrorString);
+ if (!CG->NativeObjectFile)
+ return nullptr;
+ *length = CG->NativeObjectFile->getBufferSize();
+ return CG->NativeObjectFile->getBufferStart();
bool lto_codegen_optimize(lto_code_gen_t cg) {
@@ -318,7 +336,12 @@ bool lto_codegen_optimize(lto_code_gen_t cg) {
const void *lto_codegen_compile_optimized(lto_code_gen_t cg, size_t *length) {
- return unwrap(cg)->compileOptimized(length, sLastErrorString);
+ LibLTOCodeGenerator *CG = unwrap(cg);
+ CG->NativeObjectFile = CG->compileOptimized(sLastErrorString);
+ if (!CG->NativeObjectFile)
+ return nullptr;
+ *length = CG->NativeObjectFile->getBufferSize();
+ return CG->NativeObjectFile->getBufferStart();
bool lto_codegen_compile_to_file(lto_code_gen_t cg, const char **name) {
diff --git a/tools/macho-dump/macho-dump.cpp b/tools/macho-dump/macho-dump.cpp
index 604f93a..39c2860 100644
--- a/tools/macho-dump/macho-dump.cpp
+++ b/tools/macho-dump/macho-dump.cpp
@@ -340,7 +340,7 @@ DumpDylibID(const MachOObjectFile &Obj,
static int DumpLoadCommand(const MachOObjectFile &Obj,
- MachOObjectFile::LoadCommandInfo &LCI) {
+ const MachOObjectFile::LoadCommandInfo &LCI) {
switch (LCI.C.cmd) {
case MachO::LC_SEGMENT:
return DumpSegmentCommand(Obj, LCI);
@@ -369,9 +369,8 @@ static int DumpLoadCommand(const MachOObjectFile &Obj,
static int DumpLoadCommand(const MachOObjectFile &Obj, unsigned Index,
- MachOObjectFile::LoadCommandInfo &LCI) {
+ const MachOObjectFile::LoadCommandInfo &LCI) {
outs() << " # Load Command " << Index << "\n"
<< " (('command', " << LCI.C.cmd << ")\n"
<< " ('size', " << LCI.C.cmdsize << ")\n";
@@ -423,16 +422,11 @@ int main(int argc, char **argv) {
// Print the load commands.
int Res = 0;
- MachOObjectFile::LoadCommandInfo Command =
- InputObject->getFirstLoadCommandInfo();
+ unsigned Index = 0;
outs() << "('load_commands', [\n";
- for (unsigned i = 0; ; ++i) {
- if (DumpLoadCommand(*InputObject, i, Command))
- break;
- if (i == Header->ncmds - 1)
+ for (const auto &Load : InputObject->load_commands()) {
+ if (DumpLoadCommand(*InputObject, Index++, Load))
- Command = InputObject->getNextLoadCommandInfo(Command);
outs() << "])\n";
diff --git a/tools/obj2yaml/coff2yaml.cpp b/tools/obj2yaml/coff2yaml.cpp
index 5baa644..1e29107 100644
--- a/tools/obj2yaml/coff2yaml.cpp
+++ b/tools/obj2yaml/coff2yaml.cpp
@@ -271,5 +271,5 @@ std::error_code coff2yaml(raw_ostream &Out, const object::COFFObjectFile &Obj) {
yaml::Output Yout(Out);
Yout << Dumper.getYAMLObj();
- return object::object_error::success;
+ return std::error_code();
diff --git a/tools/obj2yaml/elf2yaml.cpp b/tools/obj2yaml/elf2yaml.cpp
index 8ce70bc..eeabb0f 100644
--- a/tools/obj2yaml/elf2yaml.cpp
+++ b/tools/obj2yaml/elf2yaml.cpp
@@ -367,7 +367,7 @@ static std::error_code elf2yaml(raw_ostream &Out,
yaml::Output Yout(Out);
Yout << *YAML;
- return object::object_error::success;
+ return std::error_code();
std::error_code elf2yaml(raw_ostream &Out, const object::ObjectFile &Obj) {
diff --git a/tools/yaml2obj/yaml2elf.cpp b/tools/yaml2obj/yaml2elf.cpp
index 3386588..772b5b9 100644
--- a/tools/yaml2obj/yaml2elf.cpp
+++ b/tools/yaml2obj/yaml2elf.cpp
@@ -552,10 +552,10 @@ int yaml2elf(yaml::Input &YIn, raw_ostream &Out) {
return 1;
using object::ELFType;
- typedef ELFType<support::little, 8, true> LE64;
- typedef ELFType<support::big, 8, true> BE64;
- typedef ELFType<support::little, 4, false> LE32;
- typedef ELFType<support::big, 4, false> BE32;
+ typedef ELFType<support::little, true> LE64;
+ typedef ELFType<support::big, true> BE64;
+ typedef ELFType<support::little, false> LE32;
+ typedef ELFType<support::big, false> BE32;
if (is64Bit(Doc)) {
if (isLittleEndian(Doc))
return ELFState<LE64>::writeELF(Out, Doc);
diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp
index 498f50c..e4398f0 100644
--- a/unittests/ADT/APIntTest.cpp
+++ b/unittests/ADT/APIntTest.cpp
@@ -134,6 +134,12 @@ TEST(APIntTest, i1) {
EXPECT_EQ(one, neg_one);
EXPECT_EQ(two, neg_two);
+ // Min/max signed values.
+ EXPECT_TRUE(zero.isMaxSignedValue());
+ EXPECT_FALSE(one.isMaxSignedValue());
+ EXPECT_FALSE(zero.isMinSignedValue());
+ EXPECT_TRUE(one.isMinSignedValue());
// Additions.
EXPECT_EQ(two, one + one);
EXPECT_EQ(zero, neg_one + one);
diff --git a/unittests/ADT/ArrayRefTest.cpp b/unittests/ADT/ArrayRefTest.cpp
index 6955036..9ad32d5 100644
--- a/unittests/ADT/ArrayRefTest.cpp
+++ b/unittests/ADT/ArrayRefTest.cpp
@@ -40,7 +40,7 @@ TEST(ArrayRefTest, AllocatorCopy) {
static const uint16_t Words2[] = { 11, 4003, 67, 64000, 13 };
ArrayRef<uint16_t> Array2 = makeArrayRef(Words2, 5);
ArrayRef<uint16_t> Array1c = Array1.copy(Alloc);
- ArrayRef<uint16_t> Array2c = Array2.copy(Alloc);;
+ ArrayRef<uint16_t> Array2c = Array2.copy(Alloc);
diff --git a/unittests/ADT/TripleTest.cpp b/unittests/ADT/TripleTest.cpp
index b0f01b2..23a9128 100644
--- a/unittests/ADT/TripleTest.cpp
+++ b/unittests/ADT/TripleTest.cpp
@@ -800,6 +800,14 @@ TEST(TripleTest, getARMCPUForArch) {
EXPECT_STREQ("arm1176jzf-s", Triple.getARMCPUForArch());
+ llvm::Triple Triple("thumbv6-unknown-freebsd");
+ EXPECT_STREQ("arm1176jzf-s", Triple.getARMCPUForArch());
+ }
+ {
+ llvm::Triple Triple("armebv6-unknown-freebsd");
+ EXPECT_STREQ("arm1176jzf-s", Triple.getARMCPUForArch());
+ }
+ {
llvm::Triple Triple("arm--win32");
EXPECT_STREQ("cortex-a9", Triple.getARMCPUForArch());
diff --git a/unittests/CodeGen/DIEHashTest.cpp b/unittests/CodeGen/DIEHashTest.cpp
index c1b1160..8e78f0c 100644
--- a/unittests/CodeGen/DIEHashTest.cpp
+++ b/unittests/CodeGen/DIEHashTest.cpp
@@ -36,7 +36,7 @@ TEST_F(DIEHashTest, Data1) {
DIEHash Hash;
DIE Die(dwarf::DW_TAG_base_type);
DIEInteger Size(4);
- Die.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Size);
+ Die.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Size);
uint64_t MD5Res = Hash.computeTypeSignature(Die);
ASSERT_EQ(0x1AFE116E83701108ULL, MD5Res);
@@ -45,11 +45,11 @@ TEST_F(DIEHashTest, Data1) {
TEST_F(DIEHashTest, TrivialType) {
DIE Unnamed(dwarf::DW_TAG_structure_type);
DIEInteger One(1);
- Unnamed.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &One);
+ Unnamed.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
// Line and file number are ignored.
- Unnamed.addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, &One);
- Unnamed.addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, &One);
+ Unnamed.addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
+ Unnamed.addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, One);
uint64_t MD5Res = DIEHash().computeTypeSignature(Unnamed);
// The exact same hash GCC produces for this DIE.
@@ -61,8 +61,8 @@ TEST_F(DIEHashTest, NamedType) {
DIE Foo(dwarf::DW_TAG_structure_type);
DIEInteger One(1);
DIEString FooStr = getString("foo");
- Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &FooStr);
- Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &One);
+ Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
+ Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
uint64_t MD5Res = DIEHash().computeTypeSignature(Foo);
@@ -77,15 +77,15 @@ TEST_F(DIEHashTest, NamespacedType) {
auto Space = make_unique<DIE>(dwarf::DW_TAG_namespace);
DIEInteger One(1);
DIEString SpaceStr = getString("space");
- Space->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &SpaceStr);
+ Space->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, SpaceStr);
// DW_AT_declaration is ignored.
- Space->addValue(dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present, &One);
+ Space->addValue(dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present, One);
// sibling?
auto Foo = make_unique<DIE>(dwarf::DW_TAG_structure_type);
DIEString FooStr = getString("foo");
- Foo->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &FooStr);
- Foo->addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &One);
+ Foo->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
+ Foo->addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
DIE &N = *Foo;
@@ -101,24 +101,24 @@ TEST_F(DIEHashTest, NamespacedType) {
TEST_F(DIEHashTest, TypeWithMember) {
DIE Unnamed(dwarf::DW_TAG_structure_type);
DIEInteger Four(4);
- Unnamed.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Four);
+ Unnamed.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Four);
DIE Int(dwarf::DW_TAG_base_type);
DIEString IntStr = getString("int");
- Int.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &IntStr);
- Int.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Four);
+ Int.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, IntStr);
+ Int.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Four);
DIEInteger Five(5);
- Int.addValue(dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, &Five);
+ Int.addValue(dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Five);
DIEEntry IntRef(Int);
auto Member = make_unique<DIE>(dwarf::DW_TAG_member);
DIEString MemberStr = getString("member");
- Member->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &MemberStr);
+ Member->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemberStr);
DIEInteger Zero(0);
Member->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
- &Zero);
- Member->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &IntRef);
+ Zero);
+ Member->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IntRef);
@@ -131,34 +131,34 @@ TEST_F(DIEHashTest, TypeWithMember) {
TEST_F(DIEHashTest, ReusedType) {
DIE Unnamed(dwarf::DW_TAG_structure_type);
DIEInteger Eight(8);
- Unnamed.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Eight);
+ Unnamed.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
DIEInteger Four(4);
DIE Int(dwarf::DW_TAG_base_type);
DIEString IntStr = getString("int");
- Int.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &IntStr);
- Int.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Four);
+ Int.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, IntStr);
+ Int.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Four);
DIEInteger Five(5);
- Int.addValue(dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, &Five);
+ Int.addValue(dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Five);
DIEEntry IntRef(Int);
auto Mem1 = make_unique<DIE>(dwarf::DW_TAG_member);
DIEString Mem1Str = getString("mem1");
- Mem1->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &Mem1Str);
+ Mem1->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, Mem1Str);
DIEInteger Zero(0);
Mem1->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
- &Zero);
- Mem1->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &IntRef);
+ Zero);
+ Mem1->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IntRef);
auto Mem2 = make_unique<DIE>(dwarf::DW_TAG_member);
DIEString Mem2Str = getString("mem2");
- Mem2->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &Mem2Str);
+ Mem2->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, Mem2Str);
Mem2->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
- &Four);
- Mem2->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &IntRef);
+ Four);
+ Mem2->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IntRef);
@@ -171,15 +171,15 @@ TEST_F(DIEHashTest, ReusedType) {
TEST_F(DIEHashTest, RecursiveType) {
DIE Foo(dwarf::DW_TAG_structure_type);
DIEInteger One(1);
- Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &One);
+ Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
DIEString FooStr = getString("foo");
- Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &FooStr);
+ Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
DIEString MemStr = getString("mem");
- Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &MemStr);
+ Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
DIEEntry FooRef(Foo);
- Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &FooRef);
+ Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooRef);
// DW_AT_external and DW_AT_declaration are ignored anyway, so skip them.
@@ -193,23 +193,23 @@ TEST_F(DIEHashTest, RecursiveType) {
TEST_F(DIEHashTest, Pointer) {
DIE Foo(dwarf::DW_TAG_structure_type);
DIEInteger Eight(8);
- Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Eight);
+ Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
DIEString FooStr = getString("foo");
- Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &FooStr);
+ Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
DIEString MemStr = getString("mem");
- Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &MemStr);
+ Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
DIEInteger Zero(0);
- Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1, &Zero);
+ Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1, Zero);
DIE FooPtr(dwarf::DW_TAG_pointer_type);
- FooPtr.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Eight);
+ FooPtr.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
DIEEntry FooRef(Foo);
- FooPtr.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &FooRef);
+ FooPtr.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooRef);
DIEEntry FooPtrRef(FooPtr);
- Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &FooPtrRef);
+ Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooPtrRef);
@@ -222,27 +222,27 @@ TEST_F(DIEHashTest, Pointer) {
TEST_F(DIEHashTest, Reference) {
DIE Foo(dwarf::DW_TAG_structure_type);
DIEInteger Eight(8);
- Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Eight);
+ Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
DIEString FooStr = getString("foo");
- Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &FooStr);
+ Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
DIEString MemStr = getString("mem");
- Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &MemStr);
+ Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
DIEInteger Zero(0);
- Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1, &Zero);
+ Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1, Zero);
DIE FooRef(dwarf::DW_TAG_reference_type);
- FooRef.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Eight);
+ FooRef.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
DIEEntry FooEntry(Foo);
- FooRef.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &FooEntry);
+ FooRef.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooEntry);
DIE FooRefConst(dwarf::DW_TAG_const_type);
DIEEntry FooRefRef(FooRef);
- FooRefConst.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &FooRefRef);
+ FooRefConst.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooRefRef);
DIEEntry FooRefConstRef(FooRefConst);
- Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &FooRefConstRef);
+ Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooRefConstRef);
@@ -255,27 +255,27 @@ TEST_F(DIEHashTest, Reference) {
TEST_F(DIEHashTest, RValueReference) {
DIE Foo(dwarf::DW_TAG_structure_type);
DIEInteger Eight(8);
- Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Eight);
+ Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
DIEString FooStr = getString("foo");
- Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &FooStr);
+ Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
DIEString MemStr = getString("mem");
- Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &MemStr);
+ Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
DIEInteger Zero(0);
- Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1, &Zero);
+ Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1, Zero);
DIE FooRef(dwarf::DW_TAG_rvalue_reference_type);
- FooRef.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Eight);
+ FooRef.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
DIEEntry FooEntry(Foo);
- FooRef.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &FooEntry);
+ FooRef.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooEntry);
DIE FooRefConst(dwarf::DW_TAG_const_type);
DIEEntry FooRefRef(FooRef);
- FooRefConst.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &FooRefRef);
+ FooRefConst.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooRefRef);
DIEEntry FooRefConstRef(FooRefConst);
- Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &FooRefConstRef);
+ Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooRefConstRef);
@@ -288,24 +288,24 @@ TEST_F(DIEHashTest, RValueReference) {
TEST_F(DIEHashTest, PtrToMember) {
DIE Foo(dwarf::DW_TAG_structure_type);
DIEInteger Eight(8);
- Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Eight);
+ Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
DIEString FooStr = getString("foo");
- Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &FooStr);
+ Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
DIEString MemStr = getString("mem");
- Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &MemStr);
+ Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
DIEInteger Zero(0);
- Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1, &Zero);
+ Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1, Zero);
DIE PtrToFooMem(dwarf::DW_TAG_ptr_to_member_type);
DIEEntry FooEntry(Foo);
- PtrToFooMem.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &FooEntry);
+ PtrToFooMem.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooEntry);
PtrToFooMem.addValue(dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
- &FooEntry);
+ FooEntry);
DIEEntry PtrToFooMemRef(PtrToFooMem);
- Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &PtrToFooMemRef);
+ Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, PtrToFooMemRef);
@@ -329,27 +329,27 @@ TEST_F(DIEHashTest, PtrToMemberDeclDefMatch) {
uint64_t MD5ResDecl;
DIE Bar(dwarf::DW_TAG_structure_type);
- Bar.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &BarStr);
- Bar.addValue(dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present, &One);
+ Bar.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, BarStr);
+ Bar.addValue(dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present, One);
DIE Foo(dwarf::DW_TAG_structure_type);
- Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Eight);
- Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &FooStr);
+ Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+ Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
- Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &MemStr);
+ Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
- &Zero);
+ Zero);
DIE PtrToFooMem(dwarf::DW_TAG_ptr_to_member_type);
DIEEntry BarEntry(Bar);
- PtrToFooMem.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &BarEntry);
+ PtrToFooMem.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, BarEntry);
DIEEntry FooEntry(Foo);
PtrToFooMem.addValue(dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
- &FooEntry);
+ FooEntry);
DIEEntry PtrToFooMemRef(PtrToFooMem);
- Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &PtrToFooMemRef);
+ Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, PtrToFooMemRef);
@@ -358,27 +358,27 @@ TEST_F(DIEHashTest, PtrToMemberDeclDefMatch) {
uint64_t MD5ResDef;
DIE Bar(dwarf::DW_TAG_structure_type);
- Bar.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &BarStr);
- Bar.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &One);
+ Bar.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, BarStr);
+ Bar.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
DIE Foo(dwarf::DW_TAG_structure_type);
- Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Eight);
- Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &FooStr);
+ Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+ Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
- Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &MemStr);
+ Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
- &Zero);
+ Zero);
DIE PtrToFooMem(dwarf::DW_TAG_ptr_to_member_type);
DIEEntry BarEntry(Bar);
- PtrToFooMem.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &BarEntry);
+ PtrToFooMem.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, BarEntry);
DIEEntry FooEntry(Foo);
PtrToFooMem.addValue(dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
- &FooEntry);
+ FooEntry);
DIEEntry PtrToFooMemRef(PtrToFooMem);
- Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &PtrToFooMemRef);
+ Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, PtrToFooMemRef);
@@ -402,26 +402,26 @@ TEST_F(DIEHashTest, PtrToMemberDeclDefMisMatch) {
uint64_t MD5ResDecl;
DIE Bar(dwarf::DW_TAG_structure_type);
- Bar.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &BarStr);
- Bar.addValue(dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present, &One);
+ Bar.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, BarStr);
+ Bar.addValue(dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present, One);
DIE Foo(dwarf::DW_TAG_structure_type);
- Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Eight);
- Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &FooStr);
+ Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+ Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
- Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &MemStr);
+ Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
- &Zero);
+ Zero);
DIE PtrToFooMem(dwarf::DW_TAG_ptr_to_member_type);
DIEEntry BarEntry(Bar);
- PtrToFooMem.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &BarEntry);
+ PtrToFooMem.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, BarEntry);
PtrToFooMem.addValue(dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
- &BarEntry);
+ BarEntry);
DIEEntry PtrToFooMemRef(PtrToFooMem);
- Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &PtrToFooMemRef);
+ Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, PtrToFooMemRef);
@@ -430,26 +430,26 @@ TEST_F(DIEHashTest, PtrToMemberDeclDefMisMatch) {
uint64_t MD5ResDef;
DIE Bar(dwarf::DW_TAG_structure_type);
- Bar.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &BarStr);
- Bar.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &One);
+ Bar.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, BarStr);
+ Bar.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
DIE Foo(dwarf::DW_TAG_structure_type);
- Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Eight);
- Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &FooStr);
+ Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+ Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
- Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &MemStr);
+ Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
- &Zero);
+ Zero);
DIE PtrToFooMem(dwarf::DW_TAG_ptr_to_member_type);
DIEEntry BarEntry(Bar);
- PtrToFooMem.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &BarEntry);
+ PtrToFooMem.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, BarEntry);
PtrToFooMem.addValue(dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
- &BarEntry);
+ BarEntry);
DIEEntry PtrToFooMemRef(PtrToFooMem);
- Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &PtrToFooMemRef);
+ Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, PtrToFooMemRef);
@@ -473,23 +473,23 @@ TEST_F(DIEHashTest, RefUnnamedType) {
DIEString MemStr = getString("mem");
DIE Unnamed(dwarf::DW_TAG_structure_type);
- Unnamed.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &One);
+ Unnamed.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
DIE Foo(dwarf::DW_TAG_structure_type);
- Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Eight);
- Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &FooStr);
+ Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+ Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
- Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &MemStr);
- Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1, &Zero);
+ Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
+ Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1, Zero);
DIE UnnamedPtr(dwarf::DW_TAG_pointer_type);
- UnnamedPtr.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Eight);
+ UnnamedPtr.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
DIEEntry UnnamedRef(Unnamed);
- UnnamedPtr.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &UnnamedRef);
+ UnnamedPtr.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, UnnamedRef);
DIEEntry UnnamedPtrRef(UnnamedPtr);
- Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &UnnamedPtrRef);
+ Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, UnnamedPtrRef);
@@ -502,12 +502,12 @@ TEST_F(DIEHashTest, RefUnnamedType) {
TEST_F(DIEHashTest, NestedType) {
DIE Unnamed(dwarf::DW_TAG_structure_type);
DIEInteger One(1);
- Unnamed.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &One);
+ Unnamed.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
auto Foo = make_unique<DIE>(dwarf::DW_TAG_structure_type);
DIEString FooStr = getString("foo");
- Foo->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &FooStr);
- Foo->addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &One);
+ Foo->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
+ Foo->addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
@@ -521,11 +521,11 @@ TEST_F(DIEHashTest, NestedType) {
TEST_F(DIEHashTest, MemberFunc) {
DIE Unnamed(dwarf::DW_TAG_structure_type);
DIEInteger One(1);
- Unnamed.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &One);
+ Unnamed.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
auto Func = make_unique<DIE>(dwarf::DW_TAG_subprogram);
DIEString FuncStr = getString("func");
- Func->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &FuncStr);
+ Func->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FuncStr);
@@ -542,21 +542,21 @@ TEST_F(DIEHashTest, MemberFuncFlag) {
DIE A(dwarf::DW_TAG_structure_type);
DIEInteger One(1);
DIEString AStr = getString("A");
- A.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &AStr);
- A.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &One);
- A.addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, &One);
- A.addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, &One);
+ A.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, AStr);
+ A.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
+ A.addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
+ A.addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, One);
auto Func = make_unique<DIE>(dwarf::DW_TAG_subprogram);
DIEString FuncStr = getString("func");
DIEString FuncLinkage = getString("_ZN1A4funcEv");
DIEInteger Two(2);
- Func->addValue(dwarf::DW_AT_external, dwarf::DW_FORM_flag_present, &One);
- Func->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &FuncStr);
- Func->addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, &One);
- Func->addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, &Two);
- Func->addValue(dwarf::DW_AT_linkage_name, dwarf::DW_FORM_strp, &FuncLinkage);
- Func->addValue(dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present, &One);
+ Func->addValue(dwarf::DW_AT_external, dwarf::DW_FORM_flag_present, One);
+ Func->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FuncStr);
+ Func->addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
+ Func->addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, Two);
+ Func->addValue(dwarf::DW_AT_linkage_name, dwarf::DW_FORM_strp, FuncLinkage);
+ Func->addValue(dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present, One);
@@ -575,35 +575,35 @@ TEST_F(DIEHashTest, MemberSdata) {
DIE A(dwarf::DW_TAG_structure_type);
DIEInteger One(1);
DIEString AStr = getString("A");
- A.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &AStr);
- A.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &One);
- A.addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, &One);
- A.addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, &One);
+ A.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, AStr);
+ A.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
+ A.addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
+ A.addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, One);
DIEInteger Four(4);
DIEInteger Five(5);
DIEString FStr = getString("int");
DIE IntTyDIE(dwarf::DW_TAG_base_type);
- IntTyDIE.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Four);
- IntTyDIE.addValue(dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, &Five);
- IntTyDIE.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &FStr);
+ IntTyDIE.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Four);
+ IntTyDIE.addValue(dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Five);
+ IntTyDIE.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FStr);
DIEEntry IntTy(IntTyDIE);
auto PITyDIE = make_unique<DIE>(dwarf::DW_TAG_const_type);
- PITyDIE->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &IntTy);
+ PITyDIE->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IntTy);
auto PI = make_unique<DIE>(dwarf::DW_TAG_member);
DIEString PIStr = getString("PI");
DIEInteger Two(2);
DIEInteger NegThree(-3);
- PI->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &PIStr);
- PI->addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, &One);
- PI->addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, &Two);
- PI->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &PITy);
- PI->addValue(dwarf::DW_AT_external, dwarf::DW_FORM_flag_present, &One);
- PI->addValue(dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present, &One);
- PI->addValue(dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, &NegThree);
+ PI->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, PIStr);
+ PI->addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
+ PI->addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, Two);
+ PI->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, PITy);
+ PI->addValue(dwarf::DW_AT_external, dwarf::DW_FORM_flag_present, One);
+ PI->addValue(dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present, One);
+ PI->addValue(dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, NegThree);
@@ -620,32 +620,32 @@ TEST_F(DIEHashTest, MemberBlock) {
DIE A(dwarf::DW_TAG_structure_type);
DIEInteger One(1);
DIEString AStr = getString("A");
- A.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &AStr);
- A.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &One);
- A.addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, &One);
- A.addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, &One);
+ A.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, AStr);
+ A.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
+ A.addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
+ A.addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, One);
DIEInteger Four(4);
DIEString FStr = getString("float");
auto FloatTyDIE = make_unique<DIE>(dwarf::DW_TAG_base_type);
- FloatTyDIE->addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, &Four);
- FloatTyDIE->addValue(dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, &Four);
- FloatTyDIE->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &FStr);
+ FloatTyDIE->addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Four);
+ FloatTyDIE->addValue(dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Four);
+ FloatTyDIE->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FStr);
DIEEntry FloatTy(*FloatTyDIE);
auto PITyDIE = make_unique<DIE>(dwarf::DW_TAG_const_type);
- PITyDIE->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &FloatTy);
+ PITyDIE->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FloatTy);
auto PI = make_unique<DIE>(dwarf::DW_TAG_member);
DIEString PIStr = getString("PI");
DIEInteger Two(2);
- PI->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, &PIStr);
- PI->addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, &One);
- PI->addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, &Two);
- PI->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, &PITy);
- PI->addValue(dwarf::DW_AT_external, dwarf::DW_FORM_flag_present, &One);
- PI->addValue(dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present, &One);
+ PI->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, PIStr);
+ PI->addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
+ PI->addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, Two);
+ PI->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, PITy);
+ PI->addValue(dwarf::DW_AT_external, dwarf::DW_FORM_flag_present, One);
+ PI->addValue(dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present, One);
DIEBlock PIBlock;
DIEInteger Blk1(0xc3);
@@ -653,10 +653,10 @@ TEST_F(DIEHashTest, MemberBlock) {
DIEInteger Blk3(0x48);
DIEInteger Blk4(0x40);
- PIBlock.addValue((dwarf::Attribute)0, dwarf::DW_FORM_data1, &Blk1);
- PIBlock.addValue((dwarf::Attribute)0, dwarf::DW_FORM_data1, &Blk2);
- PIBlock.addValue((dwarf::Attribute)0, dwarf::DW_FORM_data1, &Blk3);
- PIBlock.addValue((dwarf::Attribute)0, dwarf::DW_FORM_data1, &Blk4);
+ PIBlock.addValue((dwarf::Attribute)0, dwarf::DW_FORM_data1, Blk1);
+ PIBlock.addValue((dwarf::Attribute)0, dwarf::DW_FORM_data1, Blk2);
+ PIBlock.addValue((dwarf::Attribute)0, dwarf::DW_FORM_data1, Blk3);
+ PIBlock.addValue((dwarf::Attribute)0, dwarf::DW_FORM_data1, Blk4);
PI->addValue(dwarf::DW_AT_const_value, dwarf::DW_FORM_block1, &PIBlock);
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp
index a7c9ae0..c7d4dd7 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp
+++ b/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp
@@ -127,6 +127,8 @@ protected:
+ SupportedArchs.push_back(Triple::mips64);
+ SupportedArchs.push_back(Triple::mips64el);
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h b/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h
index 901f142..0749a1d 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h
+++ b/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h
@@ -298,6 +298,8 @@ protected:
+ SupportedArchs.push_back(Triple::mips64);
+ SupportedArchs.push_back(Triple::mips64el);
diff --git a/unittests/Support/YAMLIOTest.cpp b/unittests/Support/YAMLIOTest.cpp
index 7248124..e7affa1 100644
--- a/unittests/Support/YAMLIOTest.cpp
+++ b/unittests/Support/YAMLIOTest.cpp
@@ -2074,3 +2074,123 @@ TEST(YAMLIO, TestEmptyStringSucceedsForSequence) {
+struct FlowMap {
+ llvm::StringRef str1, str2, str3;
+ FlowMap(llvm::StringRef str1, llvm::StringRef str2, llvm::StringRef str3)
+ : str1(str1), str2(str2), str3(str3) {}
+struct FlowSeq {
+ llvm::StringRef str;
+ FlowSeq(llvm::StringRef S) : str(S) {}
+ FlowSeq() = default;
+namespace llvm {
+namespace yaml {
+ template <>
+ struct MappingTraits<FlowMap> {
+ static void mapping(IO &io, FlowMap &fm) {
+ io.mapRequired("str1", fm.str1);
+ io.mapRequired("str2", fm.str2);
+ io.mapRequired("str3", fm.str3);
+ }
+ static const bool flow = true;
+ };
+template <>
+struct ScalarTraits<FlowSeq> {
+ static void output(const FlowSeq &value, void*, llvm::raw_ostream &out) {
+ out << value.str;
+ }
+ static StringRef input(StringRef scalar, void*, FlowSeq &value) {
+ value.str = scalar;
+ return "";
+ }
+ static bool mustQuote(StringRef S) { return false; }
+TEST(YAMLIO, TestWrapFlow) {
+ std::string out;
+ llvm::raw_string_ostream ostr(out);
+ FlowMap Map("This is str1", "This is str2", "This is str3");
+ std::vector<FlowSeq> Seq;
+ Seq.emplace_back("This is str1");
+ Seq.emplace_back("This is str2");
+ Seq.emplace_back("This is str3");
+ {
+ // 20 is just bellow the total length of the first mapping field.
+ // We should wreap at every element.
+ Output yout(ostr, nullptr, 15);
+ yout << Map;
+ ostr.flush();
+ EXPECT_EQ(out,
+ "---\n"
+ "{ str1: This is str1, \n"
+ " str2: This is str2, \n"
+ " str3: This is str3 }\n"
+ "...\n");
+ out.clear();
+ yout << Seq;
+ ostr.flush();
+ EXPECT_EQ(out,
+ "---\n"
+ "[ This is str1, \n"
+ " This is str2, \n"
+ " This is str3 ]\n"
+ "...\n");
+ out.clear();
+ }
+ {
+ // 25 will allow the second field to be output on the first line.
+ Output yout(ostr, nullptr, 25);
+ yout << Map;
+ ostr.flush();
+ EXPECT_EQ(out,
+ "---\n"
+ "{ str1: This is str1, str2: This is str2, \n"
+ " str3: This is str3 }\n"
+ "...\n");
+ out.clear();
+ yout << Seq;
+ ostr.flush();
+ EXPECT_EQ(out,
+ "---\n"
+ "[ This is str1, This is str2, \n"
+ " This is str3 ]\n"
+ "...\n");
+ out.clear();
+ }
+ {
+ // 0 means no wrapping.
+ Output yout(ostr, nullptr, 0);
+ yout << Map;
+ ostr.flush();
+ EXPECT_EQ(out,
+ "---\n"
+ "{ str1: This is str1, str2: This is str2, str3: This is str3 }\n"
+ "...\n");
+ out.clear();
+ yout << Seq;
+ ostr.flush();
+ EXPECT_EQ(out,
+ "---\n"
+ "[ This is str1, This is str2, This is str3 ]\n"
+ "...\n");
+ out.clear();
+ }
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp
index 8fe2f88..9b9ffb0 100644
--- a/utils/FileCheck/FileCheck.cpp
+++ b/utils/FileCheck/FileCheck.cpp
@@ -946,10 +946,7 @@ static bool ReadCheckFile(SourceMgr &SM,
// Okay, add the string we captured to the output vector and move on.
- CheckStrings.push_back(CheckString(P,
- UsedPrefix,
- PatternLoc,
- CheckTy));
+ CheckStrings.emplace_back(P, UsedPrefix, PatternLoc, CheckTy);
std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
DagNotMatches = ImplicitNegativeChecks;
@@ -957,10 +954,9 @@ static bool ReadCheckFile(SourceMgr &SM,
// Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
// prefix as a filler for the error message.
if (!DagNotMatches.empty()) {
- CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF),
- *CheckPrefixes.begin(),
- SMLoc::getFromPointer(,
- Check::CheckEOF));
+ CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
+ SMLoc::getFromPointer(,
+ Check::CheckEOF);
std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index d8f2619..0d7c5ff 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -310,11 +310,16 @@ struct MatchableInfo {
/// The suboperand index within SrcOpName, or -1 for the entire operand.
int SubOpIdx;
+ /// Whether the token is "isolated", i.e., it is preceded and followed
+ /// by separators.
+ bool IsIsolatedToken;
/// Register record if this token is singleton register.
Record *SingletonReg;
- explicit AsmOperand(StringRef T) : Token(T), Class(nullptr), SubOpIdx(-1),
- SingletonReg(nullptr) {}
+ explicit AsmOperand(bool IsIsolatedToken, StringRef T)
+ : Token(T), Class(nullptr), SubOpIdx(-1),
+ IsIsolatedToken(IsIsolatedToken), SingletonReg(nullptr) {}
/// ResOperand - This represents a single operand in the result instruction
@@ -572,6 +577,7 @@ struct MatchableInfo {
void tokenizeAsmString(const AsmMatcherInfo &Info);
+ void addAsmOperand(size_t Start, size_t End);
/// SubtargetFeatureInfo - Helper class for storing information on a subtarget
@@ -811,6 +817,19 @@ void MatchableInfo::initialize(const AsmMatcherInfo &Info,
DepMask ? !DepMask->getValue()->getAsUnquotedString().empty() : false;
+/// Append an AsmOperand for the given substring of AsmString.
+void MatchableInfo::addAsmOperand(size_t Start, size_t End) {
+ StringRef String = AsmString;
+ StringRef Separators = "[]*! \t,";
+ // Look for separators before and after to figure out is this token is
+ // isolated. Accept '$$' as that's how we escape '$'.
+ bool IsIsolatedToken =
+ (!Start || Separators.find(String[Start - 1]) != StringRef::npos ||
+ String.substr(Start - 1, 2) == "$$") &&
+ (End >= String.size() || Separators.find(String[End]) != StringRef::npos);
+ AsmOperands.push_back(AsmOperand(IsIsolatedToken, String.slice(Start, End)));
/// tokenizeAsmString - Tokenize a simplified assembly string.
void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info) {
StringRef String = AsmString;
@@ -826,28 +845,28 @@ void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info) {
case '\t':
case ',':
if (InTok) {
- AsmOperands.push_back(AsmOperand(String.slice(Prev, i)));
+ addAsmOperand(Prev, i);
InTok = false;
if (!isspace(String[i]) && String[i] != ',')
- AsmOperands.push_back(AsmOperand(String.substr(i, 1)));
+ addAsmOperand(i, i + 1);
Prev = i + 1;
case '\\':
if (InTok) {
- AsmOperands.push_back(AsmOperand(String.slice(Prev, i)));
+ addAsmOperand(Prev, i);
InTok = false;
assert(i != String.size() && "Invalid quoted character");
- AsmOperands.push_back(AsmOperand(String.substr(i, 1)));
+ addAsmOperand(i, i + 1);
Prev = i + 1;
case '$': {
if (InTok) {
- AsmOperands.push_back(AsmOperand(String.slice(Prev, i)));
+ addAsmOperand(Prev, i);
InTok = false;
@@ -860,7 +879,7 @@ void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info) {
StringRef::iterator End = std::find(String.begin() + i, String.end(),'}');
assert(End != String.end() && "Missing brace in operand reference!");
size_t EndPos = End - String.begin();
- AsmOperands.push_back(AsmOperand(String.slice(i, EndPos+1)));
+ addAsmOperand(i, EndPos+1);
Prev = EndPos + 1;
i = EndPos;
@@ -869,7 +888,7 @@ void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info) {
case '.':
if (!Info.AsmParser->getValueAsBit("MnemonicContainsDot")) {
if (InTok)
- AsmOperands.push_back(AsmOperand(String.slice(Prev, i)));
+ addAsmOperand(Prev, i);
Prev = i;
InTok = true;
@@ -880,7 +899,7 @@ void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info) {
if (InTok && Prev != String.size())
- AsmOperands.push_back(AsmOperand(String.substr(Prev)));
+ addAsmOperand(Prev, StringRef::npos);
// The first token of the instruction is the mnemonic, which must be a
// simple string, not a $foo variable or a singleton register.
@@ -962,6 +981,12 @@ extractSingletonRegisterForAsmOperand(unsigned OperandNo,
const AsmMatcherInfo &Info,
std::string &RegisterPrefix) {
StringRef Tok = AsmOperands[OperandNo].Token;
+ // If this token is not an isolated token, i.e., it isn't separated from
+ // other tokens (e.g. with whitespace), don't interpret it as a register name.
+ if (!AsmOperands[OperandNo].IsIsolatedToken)
+ return;
if (RegisterPrefix.empty()) {
std::string LoweredTok = Tok.lower();
if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(LoweredTok))
@@ -1224,8 +1249,8 @@ void AsmMatcherInfo::buildOperandClasses() {
CI->Kind = ClassInfo::UserClass0 + Index;
ListInit *Supers = Rec->getValueAsListInit("SuperClasses");
- for (unsigned i = 0, e = Supers->getSize(); i != e; ++i) {
- DefInit *DI = dyn_cast<DefInit>(Supers->getElement(i));
+ for (Init *I : Supers->getValues()) {
+ DefInit *DI = dyn_cast<DefInit>(I);
if (!DI) {
PrintError(Rec->getLoc(), "Invalid super class reference!");
@@ -1510,7 +1535,7 @@ buildInstructionOperandReference(MatchableInfo *II,
// Insert remaining suboperands after AsmOpIdx in II->AsmOperands.
StringRef Token = Op->Token; // save this in case Op gets moved
for (unsigned SI = 1, SE = Operands[Idx].MINumOperands; SI != SE; ++SI) {
- MatchableInfo::AsmOperand NewAsmOp(Token);
+ MatchableInfo::AsmOperand NewAsmOp(/*IsIsolatedToken=*/true, Token);
NewAsmOp.SubOpIdx = SI;
II->AsmOperands.insert(II->AsmOperands.begin()+AsmOpIdx+SI, NewAsmOp);
@@ -1772,7 +1797,7 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
// Add the converter row for this instruction.
- ConversionTable.push_back(std::vector<uint8_t>());
+ ConversionTable.emplace_back();
@@ -2136,8 +2161,7 @@ static void emitMatchTokenString(CodeGenTarget &Target,
std::vector<StringMatcher::StringPair> Matches;
for (const auto &CI : Infos) {
if (CI.Kind == ClassInfo::Token)
- Matches.push_back(
- StringMatcher::StringPair(CI.ValueName, "return " + CI.Name + ";"));
+ Matches.emplace_back(CI.ValueName, "return " + CI.Name + ";");
OS << "static MatchClassKind matchTokenString(StringRef Name) {\n";
@@ -2159,9 +2183,8 @@ static void emitMatchRegisterName(CodeGenTarget &Target, Record *AsmParser,
if (Reg.TheDef->getValueAsString("AsmName").empty())
- Matches.push_back(
- StringMatcher::StringPair(Reg.TheDef->getValueAsString("AsmName"),
- "return " + utostr(Reg.EnumValue) + ";"));
+ Matches.emplace_back(Reg.TheDef->getValueAsString("AsmName"),
+ "return " + utostr(Reg.EnumValue) + ";");
OS << "static unsigned MatchRegisterName(StringRef Name) {\n";
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index 389889a..8163f68 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -1105,9 +1105,8 @@ AsmWriterEmitter::AsmWriterEmitter(RecordKeeper &R) : Records(R), Target(R) {
Record *AsmWriter = Target.getAsmWriter();
for (const CodeGenInstruction *I : Target.instructions())
if (!I->AsmString.empty() && I->TheDef->getName() != "PHI")
- Instructions.push_back(
- AsmWriterInst(*I, AsmWriter->getValueAsInt("Variant"),
- AsmWriter->getValueAsInt("PassSubtarget")));
+ Instructions.emplace_back(*I, AsmWriter->getValueAsInt("Variant"),
+ AsmWriter->getValueAsInt("PassSubtarget"));
// Get the instruction numbering.
NumberedInstructions = &Target.getInstructionsByEnumValue();
diff --git a/utils/TableGen/AsmWriterInst.cpp b/utils/TableGen/AsmWriterInst.cpp
index a66b1a0..9541887 100644
--- a/utils/TableGen/AsmWriterInst.cpp
+++ b/utils/TableGen/AsmWriterInst.cpp
@@ -163,27 +163,22 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant,
if (VarName.empty()) {
// Just a modifier, pass this into PrintSpecial.
- Operands.push_back(AsmWriterOperand("PrintSpecial",
- ~0U,
- ~0U,
- Modifier,
- PassSubtarget));
+ Operands.emplace_back("PrintSpecial", ~0U, ~0U, Modifier,
+ PassSubtarget);
} else {
// Otherwise, normal operand.
unsigned OpNo = CGI.Operands.getOperandNamed(VarName);
CGIOperandList::OperandInfo OpInfo = CGI.Operands[OpNo];
unsigned MIOp = OpInfo.MIOperandNo;
- Operands.push_back(AsmWriterOperand(OpInfo.PrinterMethodName,
- OpNo, MIOp, Modifier,
- PassSubtarget));
+ Operands.emplace_back(OpInfo.PrinterMethodName, OpNo, MIOp, Modifier,
+ PassSubtarget);
LastEmitted = VarEnd;
- Operands.push_back(AsmWriterOperand("return;",
- AsmWriterOperand::isLiteralStatementOperand));
+ Operands.emplace_back("return;", AsmWriterOperand::isLiteralStatementOperand);
/// MatchesAllButOneOp - If this instruction is exactly identical to the
diff --git a/utils/TableGen/CTagsEmitter.cpp b/utils/TableGen/CTagsEmitter.cpp
index bbed92a1..35f4ad6 100644
--- a/utils/TableGen/CTagsEmitter.cpp
+++ b/utils/TableGen/CTagsEmitter.cpp
@@ -24,8 +24,6 @@ using namespace llvm;
#define DEBUG_TYPE "ctags-emitter"
-namespace llvm { extern SourceMgr SrcMgr; }
namespace {
class Tag {
@@ -61,11 +59,7 @@ private:
SMLoc CTagsEmitter::locate(const Record *R) {
ArrayRef<SMLoc> Locs = R->getLoc();
- if (Locs.empty()) {
- SMLoc NullLoc;
- return NullLoc;
- }
- return Locs.front();
+ return !Locs.empty() ? Locs.front() : SMLoc();
void CTagsEmitter::run(raw_ostream &OS) {
@@ -82,9 +76,8 @@ void CTagsEmitter::run(raw_ostream &OS) {
std::sort(Tags.begin(), Tags.end());
OS << "!_TAG_FILE_FORMAT\t1\t/original ctags format/\n";
OS << "!_TAG_FILE_SORTED\t1\t/0=unsorted, 1=sorted, 2=foldcase/\n";
- for (std::vector<Tag>::const_iterator I = Tags.begin(), E = Tags.end();
- I != E; ++I)
- I->emit(OS);
+ for (const Tag &T : Tags)
+ T.emit(OS);
namespace llvm {
diff --git a/utils/TableGen/CallingConvEmitter.cpp b/utils/TableGen/CallingConvEmitter.cpp
index 051a7e9..c7519b3 100644
--- a/utils/TableGen/CallingConvEmitter.cpp
+++ b/utils/TableGen/CallingConvEmitter.cpp
@@ -69,7 +69,7 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) {
<< std::string(CC->getName().size()+13, ' ')
<< "ISD::ArgFlagsTy ArgFlags, CCState &State) {\n";
// Emit all of the actions, in order.
- for (unsigned i = 0, e = CCActions->getSize(); i != e; ++i) {
+ for (unsigned i = 0, e = CCActions->size(); i != e; ++i) {
O << "\n";
EmitAction(CCActions->getElementAsRecord(i), 2, O);
@@ -87,7 +87,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
if (Action->isSubClassOf("CCIfType")) {
ListInit *VTs = Action->getValueAsListInit("VTs");
- for (unsigned i = 0, e = VTs->getSize(); i != e; ++i) {
+ for (unsigned i = 0, e = VTs->size(); i != e; ++i) {
Record *VT = VTs->getElementAsRecord(i);
if (i != 0) O << " ||\n " << IndentStr;
O << "LocVT == " << getEnumName(getValueType(VT));
@@ -111,14 +111,14 @@ void CallingConvEmitter::EmitAction(Record *Action,
<< IndentStr << " return false;\n";
} else if (Action->isSubClassOf("CCAssignToReg")) {
ListInit *RegList = Action->getValueAsListInit("RegList");
- if (RegList->getSize() == 1) {
+ if (RegList->size() == 1) {
O << IndentStr << "if (unsigned Reg = State.AllocateReg(";
O << getQualifiedName(RegList->getElementAsRecord(0)) << ")) {\n";
} else {
O << IndentStr << "static const MCPhysReg RegList" << ++Counter
<< "[] = {\n";
O << IndentStr << " ";
- for (unsigned i = 0, e = RegList->getSize(); i != e; ++i) {
+ for (unsigned i = 0, e = RegList->size(); i != e; ++i) {
if (i != 0) O << ", ";
O << getQualifiedName(RegList->getElementAsRecord(i));
@@ -133,11 +133,10 @@ void CallingConvEmitter::EmitAction(Record *Action,
} else if (Action->isSubClassOf("CCAssignToRegWithShadow")) {
ListInit *RegList = Action->getValueAsListInit("RegList");
ListInit *ShadowRegList = Action->getValueAsListInit("ShadowRegList");
- if (ShadowRegList->getSize() >0 &&
- ShadowRegList->getSize() != RegList->getSize())
+ if (!ShadowRegList->empty() && ShadowRegList->size() != RegList->size())
PrintFatalError("Invalid length of list of shadowed registers");
- if (RegList->getSize() == 1) {
+ if (RegList->size() == 1) {
O << IndentStr << "if (unsigned Reg = State.AllocateReg(";
O << getQualifiedName(RegList->getElementAsRecord(0));
O << ", " << getQualifiedName(ShadowRegList->getElementAsRecord(0));
@@ -149,7 +148,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
O << IndentStr << "static const MCPhysReg RegList" << RegListNumber
<< "[] = {\n";
O << IndentStr << " ";
- for (unsigned i = 0, e = RegList->getSize(); i != e; ++i) {
+ for (unsigned i = 0, e = RegList->size(); i != e; ++i) {
if (i != 0) O << ", ";
O << getQualifiedName(RegList->getElementAsRecord(i));
@@ -158,7 +157,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
O << IndentStr << "static const MCPhysReg RegList"
<< ShadowRegListNumber << "[] = {\n";
O << IndentStr << " ";
- for (unsigned i = 0, e = ShadowRegList->getSize(); i != e; ++i) {
+ for (unsigned i = 0, e = ShadowRegList->size(); i != e; ++i) {
if (i != 0) O << ", ";
O << getQualifiedName(ShadowRegList->getElementAsRecord(i));
@@ -206,7 +205,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
O << IndentStr << "static const MCPhysReg ShadowRegList"
<< ShadowRegListNumber << "[] = {\n";
O << IndentStr << " ";
- for (unsigned i = 0, e = ShadowRegList->getSize(); i != e; ++i) {
+ for (unsigned i = 0, e = ShadowRegList->size(); i != e; ++i) {
if (i != 0) O << ", ";
O << getQualifiedName(ShadowRegList->getElementAsRecord(i));
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index fd02bbd..fa6fd43 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -842,8 +842,8 @@ getPatternComplexity(const CodeGenDAGPatterns &CGP) const {
std::string PatternToMatch::getPredicateCheck() const {
std::string PredicateCheck;
- for (unsigned i = 0, e = Predicates->getSize(); i != e; ++i) {
- if (DefInit *Pred = dyn_cast<DefInit>(Predicates->getElement(i))) {
+ for (Init *I : Predicates->getValues()) {
+ if (DefInit *Pred = dyn_cast<DefInit>(I)) {
Record *Def = Pred->getDef();
if (!Def->isSubClassOf("Predicate")) {
#ifndef NDEBUG
@@ -1999,8 +1999,8 @@ bool TreePatternNode::canPatternMatch(std::string &Reason,
TreePattern::TreePattern(Record *TheRec, ListInit *RawPat, bool isInput,
CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp),
isInputPattern(isInput), HasError(false) {
- for (unsigned i = 0, e = RawPat->getSize(); i != e; ++i)
- Trees.push_back(ParseTreePattern(RawPat->getElement(i), ""));
+ for (Init *I : RawPat->getValues())
+ Trees.push_back(ParseTreePattern(I, ""));
TreePattern::TreePattern(Record *TheRec, DagInit *Pat, bool isInput,
@@ -2860,8 +2860,8 @@ static bool hasNullFragReference(DagInit *DI) {
/// hasNullFragReference - Return true if any DAG in the list references
/// the null_frag operator.
static bool hasNullFragReference(ListInit *LI) {
- for (unsigned i = 0, e = LI->getSize(); i != e; ++i) {
- DagInit *DI = dyn_cast<DagInit>(LI->getElement(i));
+ for (Init *I : LI->getValues()) {
+ DagInit *DI = dyn_cast<DagInit>(I);
assert(DI && "non-dag in an instruction Pattern list?!");
if (hasNullFragReference(DI))
return true;
@@ -3798,13 +3798,11 @@ void CodeGenDAGPatterns::GenerateVariants() {
if (AlreadyExists) continue;
// Otherwise, add it to the list of patterns we have.
- PatternsToMatch.
- push_back(PatternToMatch(PatternsToMatch[i].getSrcRecord(),
- PatternsToMatch[i].getPredicates(),
- Variant, PatternsToMatch[i].getDstPattern(),
- PatternsToMatch[i].getDstRegs(),
- PatternsToMatch[i].getAddedComplexity(),
- Record::getNewUID()));
+ PatternsToMatch.emplace_back(
+ PatternsToMatch[i].getSrcRecord(), PatternsToMatch[i].getPredicates(),
+ Variant, PatternsToMatch[i].getDstPattern(),
+ PatternsToMatch[i].getDstRegs(),
+ PatternsToMatch[i].getAddedComplexity(), Record::getNewUID());
DEBUG(errs() << "\n");
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 1060296..e83d503 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -115,9 +115,9 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
PrintFatalError("In instruction '" + R->getName() + "', operand #" +
Twine(i) + " has the same name as a previous operand!");
- OperandList.push_back(OperandInfo(Rec, ArgName, PrintMethod, EncoderMethod,
- OperandNamespace + "::" + OperandType,
- MIOperandNo, NumOps, MIOpInfo));
+ OperandList.emplace_back(Rec, ArgName, PrintMethod, EncoderMethod,
+ OperandNamespace + "::" + OperandType, MIOperandNo,
+ NumOps, MIOpInfo);
MIOperandNo += NumOps;
@@ -320,6 +320,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
isRegSequence = R->getValueAsBit("isRegSequence");
isExtractSubreg = R->getValueAsBit("isExtractSubreg");
isInsertSubreg = R->getValueAsBit("isInsertSubreg");
+ isConvergent = R->getValueAsBit("isConvergent");
bool Unset;
mayLoad = R->getValueAsBitOrUnset("mayLoad", Unset);
@@ -641,9 +642,9 @@ CodeGenInstAlias::CodeGenInstAlias(Record *R, unsigned Variant,
// Take care to instantiate each of the suboperands with the correct
// nomenclature: $
- ResultOperands.push_back(
- ResultOperand(Result->getArgName(AliasOpNo) + "." +
- MIOI->getArgName(SubOp), SubRec));
+ ResultOperands.emplace_back(Result->getArgName(AliasOpNo) + "." +
+ MIOI->getArgName(SubOp),
+ SubRec);
ResultInstOperandIndex.push_back(std::make_pair(i, SubOp));
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index bdbe546..8f01abd 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -14,9 +14,10 @@
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineValueType.h"
-#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/SMLoc.h"
#include <string>
#include <utility>
#include <vector>
@@ -255,6 +256,7 @@ namespace llvm {
bool isRegSequence : 1;
bool isExtractSubreg : 1;
bool isInsertSubreg : 1;
+ bool isConvergent : 1;
std::string DeprecatedReason;
bool HasComplexDeprecationPredicate;
diff --git a/utils/TableGen/CodeGenMapTable.cpp b/utils/TableGen/CodeGenMapTable.cpp
index b52a91d..48df439 100644
--- a/utils/TableGen/CodeGenMapTable.cpp
+++ b/utils/TableGen/CodeGenMapTable.cpp
@@ -132,12 +132,12 @@ public:
PrintFatalError(MapRec->getLoc(), "InstrMapping record `" +
MapRec->getName() + "' has empty " + "`ValueCols' field!");
- for (unsigned i = 0, e = ColValList->getSize(); i < e; i++) {
- ListInit *ColI = dyn_cast<ListInit>(ColValList->getElement(i));
+ for (Init *I : ColValList->getValues()) {
+ ListInit *ColI = dyn_cast<ListInit>(I);
// Make sure that all the sub-lists in 'ValueCols' have same number of
// elements as the fields in 'ColFields'.
- if (ColI->getSize() != ColFields->getSize())
+ if (ColI->size() != ColFields->size())
PrintFatalError(MapRec->getLoc(), "Record `" + MapRec->getName() +
"', field `ValueCols' entries don't match with " +
" the entries in 'ColFields'!");
@@ -239,13 +239,11 @@ public:
void MapTableEmitter::buildRowInstrMap() {
- for (unsigned i = 0, e = InstrDefs.size(); i < e; i++) {
- Record *CurInstr = InstrDefs[i];
+ for (Record *CurInstr : InstrDefs) {
std::vector<Init*> KeyValue;
ListInit *RowFields = InstrMapDesc.getRowFields();
- for (unsigned j = 0, endRF = RowFields->getSize(); j < endRF; j++) {
- Init *RowFieldsJ = RowFields->getElement(j);
- Init *CurInstrVal = CurInstr->getValue(RowFieldsJ)->getValue();
+ for (Init *RowField : RowFields->getValues()) {
+ Init *CurInstrVal = CurInstr->getValue(RowField)->getValue();
@@ -269,7 +267,7 @@ bool MapTableEmitter::isKeyColInstr(Record* CurInstr) {
// Check if the instruction is a KeyCol instruction.
bool MatchFound = true;
- for (unsigned j = 0, endCF = ColFields->getSize();
+ for (unsigned j = 0, endCF = ColFields->size();
(j < endCF) && MatchFound; j++) {
RecordVal *ColFieldName = CurInstr->getValue(ColFields->getElement(j));
std::string CurInstrVal = ColFieldName->getValue()->getAsUnquotedString();
@@ -289,8 +287,7 @@ void MapTableEmitter::buildMapTable() {
// constraints.
const std::vector<ListInit*> &ValueCols = InstrMapDesc.getValueCols();
unsigned NumOfCols = ValueCols.size();
- for (unsigned j = 0, endKI = KeyInstrVec.size(); j < endKI; j++) {
- Record *CurKeyInstr = KeyInstrVec[j];
+ for (Record *CurKeyInstr : KeyInstrVec) {
std::vector<Record*> ColInstrVec(NumOfCols);
// Find the column instruction based on the constraints for the column.
@@ -313,9 +310,8 @@ Record *MapTableEmitter::getInstrForColumn(Record *KeyInstr,
std::vector<Init*> KeyValue;
// Construct KeyValue using KeyInstr's values for RowFields.
- for (unsigned j = 0, endRF = RowFields->getSize(); j < endRF; j++) {
- Init *RowFieldsJ = RowFields->getElement(j);
- Init *KeyInstrVal = KeyInstr->getValue(RowFieldsJ)->getValue();
+ for (Init *RowField : RowFields->getValues()) {
+ Init *KeyInstrVal = KeyInstr->getValue(RowField)->getValue();
@@ -331,7 +327,7 @@ Record *MapTableEmitter::getInstrForColumn(Record *KeyInstr,
for (unsigned i = 0, e = RelatedInstrVec.size(); i < e; i++) {
bool MatchFound = true;
Record *CurInstr = RelatedInstrVec[i];
- for (unsigned j = 0, endCF = ColFields->getSize();
+ for (unsigned j = 0, endCF = ColFields->size();
(j < endCF) && MatchFound; j++) {
Init *ColFieldJ = ColFields->getElement(j);
Init *CurInstrInit = CurInstr->getValue(ColFieldJ)->getValue();
@@ -443,12 +439,12 @@ void MapTableEmitter::emitMapFuncBody(raw_ostream &OS,
if (ValueCols.size() > 1) {
for (unsigned i = 0, e = ValueCols.size(); i < e; i++) {
ListInit *ColumnI = ValueCols[i];
- for (unsigned j = 0, ColSize = ColumnI->getSize(); j < ColSize; j++) {
+ for (unsigned j = 0, ColSize = ColumnI->size(); j < ColSize; ++j) {
std::string ColName = ColFields->getElement(j)->getAsUnquotedString();
OS << " if (in" << ColName;
OS << " == ";
OS << ColName << "_" << ColumnI->getElement(j)->getAsUnquotedString();
- if (j < ColumnI->getSize() - 1) OS << " && ";
+ if (j < ColumnI->size() - 1) OS << " && ";
else OS << ")\n";
OS << " return " << InstrMapDesc.getName();
@@ -478,8 +474,8 @@ void MapTableEmitter::emitTablesWithFunc(raw_ostream &OS) {
OS << "// "<< InstrMapDesc.getName() << "\n";
OS << "int "<< InstrMapDesc.getName() << "(uint16_t Opcode";
if (ValueCols.size() > 1) {
- for (unsigned i = 0, e = ColFields->getSize(); i < e; i++) {
- std::string ColName = ColFields->getElement(i)->getAsUnquotedString();
+ for (Init *CF : ColFields->getValues()) {
+ std::string ColName = CF->getAsUnquotedString();
OS << ", enum " << ColName << " in" << ColName << ") {\n";
} else { OS << ") {\n"; }
@@ -509,18 +505,18 @@ static void emitEnums(raw_ostream &OS, RecordKeeper &Records) {
ColFields = CurMap->getValueAsListInit("ColFields");
ListInit *List = CurMap->getValueAsListInit("ValueCols");
std::vector<ListInit*> ValueCols;
- unsigned ListSize = List->getSize();
+ unsigned ListSize = List->size();
for (unsigned j = 0; j < ListSize; j++) {
ListInit *ListJ = dyn_cast<ListInit>(List->getElement(j));
- if (ListJ->getSize() != ColFields->getSize())
+ if (ListJ->size() != ColFields->size())
PrintFatalError("Record `" + CurMap->getName() + "', field "
"`ValueCols' entries don't match with the entries in 'ColFields' !");
- for (unsigned j = 0, endCF = ColFields->getSize(); j < endCF; j++) {
+ for (unsigned j = 0, endCF = ColFields->size(); j < endCF; j++) {
for (unsigned k = 0; k < ListSize; k++){
std::string ColName = ColFields->getElement(j)->getAsUnquotedString();
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index c6940e9..c9e6d1d 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -543,7 +543,7 @@ struct TupleExpander : SetTheory::Expander {
std::vector<Record*> Indices = Def->getValueAsListOfDefs("SubRegIndices");
unsigned Dim = Indices.size();
ListInit *SubRegs = Def->getValueAsListInit("SubRegs");
- if (Dim != SubRegs->getSize())
+ if (Dim != SubRegs->size())
PrintFatalError(Def->getLoc(), "SubRegIndices and SubRegs size mismatch");
if (Dim < 2)
@@ -676,7 +676,7 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
// Allocation order 0 is the full set. AltOrders provides others.
const SetTheory::RecVec *Elements = RegBank.getSets().expand(R);
ListInit *AltOrders = R->getValueAsListInit("AltOrders");
- Orders.resize(1 + AltOrders->getSize());
+ Orders.resize(1 + AltOrders->size());
// Default allocation order always contains all registers.
for (unsigned i = 0, e = Elements->size(); i != e; ++i) {
@@ -689,7 +689,7 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
// Alternative allocation orders may be subsets.
SetTheory::RecSet Order;
- for (unsigned i = 0, e = AltOrders->getSize(); i != e; ++i) {
+ for (unsigned i = 0, e = AltOrders->size(); i != e; ++i) {
RegBank.getSets().evaluate(AltOrders->getElement(i), Order, R->getLoc());
Orders[1 + i].append(Order.begin(), Order.end());
// Verify that all altorder members are regclass members.
@@ -994,7 +994,7 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) {
// Allocate user-defined register classes.
for (auto *RC : RCs) {
- RegClasses.push_back(CodeGenRegisterClass(*this, RC));
+ RegClasses.emplace_back(*this, RC);
@@ -1056,7 +1056,7 @@ CodeGenRegBank::getOrCreateSubClass(const CodeGenRegisterClass *RC,
return FoundI->second;
// Sub-class doesn't exist, create a new one.
- RegClasses.push_back(CodeGenRegisterClass(*this, Name, K));
+ RegClasses.emplace_back(*this, Name, K);
return &RegClasses.back();
diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp
index 58363e8..bc27481 100644
--- a/utils/TableGen/CodeGenSchedule.cpp
+++ b/utils/TableGen/CodeGenSchedule.cpp
@@ -145,8 +145,7 @@ void CodeGenSchedModels::collectProcModels() {
// Use idx=0 for NoModel/NoItineraries.
Record *NoModelDef = Records.getDef("NoSchedModel");
Record *NoItinsDef = Records.getDef("NoItineraries");
- ProcModels.push_back(CodeGenProcModel(0, "NoSchedModel",
- NoModelDef, NoItinsDef));
+ ProcModels.emplace_back(0, "NoSchedModel", NoModelDef, NoItinsDef);
ProcModelMap[NoModelDef] = 0;
// For each processor, find a unique machine model.
@@ -164,16 +163,14 @@ void CodeGenSchedModels::addProcModel(Record *ProcDef) {
std::string Name = ModelKey->getName();
if (ModelKey->isSubClassOf("SchedMachineModel")) {
Record *ItinsDef = ModelKey->getValueAsDef("Itineraries");
- ProcModels.push_back(
- CodeGenProcModel(ProcModels.size(), Name, ModelKey, ItinsDef));
+ ProcModels.emplace_back(ProcModels.size(), Name, ModelKey, ItinsDef);
else {
// An itinerary is defined without a machine model. Infer a new model.
if (!ModelKey->getValueAsListOfDefs("IID").empty())
Name = Name + "Model";
- ProcModels.push_back(
- CodeGenProcModel(ProcModels.size(), Name,
- ProcDef->getValueAsDef("SchedModel"), ModelKey));
+ ProcModels.emplace_back(ProcModels.size(), Name,
+ ProcDef->getValueAsDef("SchedModel"), ModelKey);
@@ -281,12 +278,12 @@ void CodeGenSchedModels::collectSchedRW() {
std::sort(SWDefs.begin(), SWDefs.end(), LessRecord());
for (RecIter SWI = SWDefs.begin(), SWE = SWDefs.end(); SWI != SWE; ++SWI) {
assert(!getSchedRWIdx(*SWI, /*IsRead=*/false) && "duplicate SchedWrite");
- SchedWrites.push_back(CodeGenSchedRW(SchedWrites.size(), *SWI));
+ SchedWrites.emplace_back(SchedWrites.size(), *SWI);
std::sort(SRDefs.begin(), SRDefs.end(), LessRecord());
for (RecIter SRI = SRDefs.begin(), SRE = SRDefs.end(); SRI != SRE; ++SRI) {
assert(!getSchedRWIdx(*SRI, /*IsRead-*/true) && "duplicate SchedWrite");
- SchedReads.push_back(CodeGenSchedRW(SchedReads.size(), *SRI));
+ SchedReads.emplace_back(SchedReads.size(), *SRI);
// Initialize WriteSequence vectors.
for (std::vector<CodeGenSchedRW>::iterator WI = SchedWrites.begin(),
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index 0765370..e79a809 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -486,7 +486,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
// Parse the list of return types.
std::vector<MVT::SimpleValueType> OverloadedVTs;
ListInit *TypeList = R->getValueAsListInit("RetTypes");
- for (unsigned i = 0, e = TypeList->getSize(); i != e; ++i) {
+ for (unsigned i = 0, e = TypeList->size(); i != e; ++i) {
Record *TyEl = TypeList->getElementAsRecord(i);
assert(TyEl->isSubClassOf("LLVMType") && "Expected a type!");
MVT::SimpleValueType VT;
@@ -520,7 +520,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
// Parse the list of parameter types.
TypeList = R->getValueAsListInit("ParamTypes");
- for (unsigned i = 0, e = TypeList->getSize(); i != e; ++i) {
+ for (unsigned i = 0, e = TypeList->size(); i != e; ++i) {
Record *TyEl = TypeList->getElementAsRecord(i);
assert(TyEl->isSubClassOf("LLVMType") && "Expected a type!");
MVT::SimpleValueType VT;
@@ -556,7 +556,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
// Parse the intrinsic properties.
ListInit *PropList = R->getValueAsListInit("Properties");
- for (unsigned i = 0, e = PropList->getSize(); i != e; ++i) {
+ for (unsigned i = 0, e = PropList->size(); i != e; ++i) {
Record *Property = PropList->getElementAsRecord(i);
assert(Property->isSubClassOf("IntrinsicProperty") &&
"Expected a property!");
diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp
index 7905b1a..36a2183 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -610,7 +610,7 @@ void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
// A new filter entry begins a new scope for fixup resolution.
- TableInfo.FixupStack.push_back(FixupList());
+ TableInfo.FixupStack.emplace_back();
DecoderTable &Table = TableInfo.Table;
@@ -1113,7 +1113,7 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
ListInit *Predicates =
bool IsFirstEmission = true;
- for (unsigned i = 0; i < Predicates->getSize(); ++i) {
+ for (unsigned i = 0; i < Predicates->size(); ++i) {
Record *Pred = Predicates->getElementAsRecord(i);
if (!Pred->getValue("AssemblerMatcherPredicate"))
@@ -1136,13 +1136,13 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
emitSinglePredicateMatch(o, pairs.first, Emitter->PredicateNamespace);
IsFirstEmission = false;
- return Predicates->getSize() > 0;
+ return !Predicates->empty();
bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const {
ListInit *Predicates =
- for (unsigned i = 0; i < Predicates->getSize(); ++i) {
+ for (unsigned i = 0; i < Predicates->size(); ++i) {
Record *Pred = Predicates->getElementAsRecord(i);
if (!Pred->getValue("AssemblerMatcherPredicate"))
@@ -1333,7 +1333,7 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
// complex singletons need predicate checks from the first singleton
// to refer forward to the variable filterchooser that follows.
- TableInfo.FixupStack.push_back(FixupList());
+ TableInfo.FixupStack.emplace_back();
emitSingletonTableEntry(TableInfo, Opc);
@@ -1350,7 +1350,7 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit,
bool mixed) {
- Filters.push_back(Filter(*this, startBit, numBit, true));
+ Filters.emplace_back(*this, startBit, numBit, true);
BestIndex = 0; // Sole Filter instance to choose from.
@@ -1360,9 +1360,9 @@ void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit,
void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit,
unsigned BitIndex, bool AllowMixed) {
if (RA == ATTR_MIXED && AllowMixed)
- Filters.push_back(Filter(*this, StartBit, BitIndex - StartBit, true));
+ Filters.emplace_back(*this, StartBit, BitIndex - StartBit, true);
else if (RA == ATTR_ALL_SET && !AllowMixed)
- Filters.push_back(Filter(*this, StartBit, BitIndex - StartBit, false));
+ Filters.emplace_back(*this, StartBit, BitIndex - StartBit, false);
// FilterProcessor scans the well-known encoding bits of the instructions and
@@ -2179,7 +2179,7 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
- TableInfo.FixupStack.push_back(FixupList());
+ TableInfo.FixupStack.emplace_back();
// Any NumToSkip fixups in the top level scope can resolve to the
// OPC_Fail at the end of the table.
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 7b69de5..e242a96 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -475,41 +475,42 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
OS << " { ";
OS << Num << ",\t" << MinOperands << ",\t"
<< Inst.Operands.NumDefs << ",\t"
- << SchedModels.getSchedClassIdx(Inst) << ",\t"
- << Inst.TheDef->getValueAsInt("Size") << ",\t0";
+ << Inst.TheDef->getValueAsInt("Size") << ",\t"
+ << SchedModels.getSchedClassIdx(Inst) << ",\t0";
// Emit all of the target independent flags...
- if (Inst.isPseudo) OS << "|(1<<MCID::Pseudo)";
- if (Inst.isReturn) OS << "|(1<<MCID::Return)";
- if (Inst.isBranch) OS << "|(1<<MCID::Branch)";
- if (Inst.isIndirectBranch) OS << "|(1<<MCID::IndirectBranch)";
- if (Inst.isCompare) OS << "|(1<<MCID::Compare)";
- if (Inst.isMoveImm) OS << "|(1<<MCID::MoveImm)";
- if (Inst.isBitcast) OS << "|(1<<MCID::Bitcast)";
- if (Inst.isSelect) OS << "|(1<<MCID::Select)";
- if (Inst.isBarrier) OS << "|(1<<MCID::Barrier)";
- if (Inst.hasDelaySlot) OS << "|(1<<MCID::DelaySlot)";
- if (Inst.isCall) OS << "|(1<<MCID::Call)";
- if (Inst.canFoldAsLoad) OS << "|(1<<MCID::FoldableAsLoad)";
- if (Inst.mayLoad) OS << "|(1<<MCID::MayLoad)";
- if (Inst.mayStore) OS << "|(1<<MCID::MayStore)";
- if (Inst.isPredicable) OS << "|(1<<MCID::Predicable)";
- if (Inst.isConvertibleToThreeAddress) OS << "|(1<<MCID::ConvertibleTo3Addr)";
- if (Inst.isCommutable) OS << "|(1<<MCID::Commutable)";
- if (Inst.isTerminator) OS << "|(1<<MCID::Terminator)";
- if (Inst.isReMaterializable) OS << "|(1<<MCID::Rematerializable)";
- if (Inst.isNotDuplicable) OS << "|(1<<MCID::NotDuplicable)";
- if (Inst.Operands.hasOptionalDef) OS << "|(1<<MCID::HasOptionalDef)";
- if (Inst.usesCustomInserter) OS << "|(1<<MCID::UsesCustomInserter)";
- if (Inst.hasPostISelHook) OS << "|(1<<MCID::HasPostISelHook)";
- if (Inst.Operands.isVariadic)OS << "|(1<<MCID::Variadic)";
- if (Inst.hasSideEffects) OS << "|(1<<MCID::UnmodeledSideEffects)";
- if (Inst.isAsCheapAsAMove) OS << "|(1<<MCID::CheapAsAMove)";
- if (Inst.hasExtraSrcRegAllocReq) OS << "|(1<<MCID::ExtraSrcRegAllocReq)";
- if (Inst.hasExtraDefRegAllocReq) OS << "|(1<<MCID::ExtraDefRegAllocReq)";
- if (Inst.isRegSequence) OS << "|(1<<MCID::RegSequence)";
- if (Inst.isExtractSubreg) OS << "|(1<<MCID::ExtractSubreg)";
- if (Inst.isInsertSubreg) OS << "|(1<<MCID::InsertSubreg)";
+ if (Inst.isPseudo) OS << "|(1ULL<<MCID::Pseudo)";
+ if (Inst.isReturn) OS << "|(1ULL<<MCID::Return)";
+ if (Inst.isBranch) OS << "|(1ULL<<MCID::Branch)";
+ if (Inst.isIndirectBranch) OS << "|(1ULL<<MCID::IndirectBranch)";
+ if (Inst.isCompare) OS << "|(1ULL<<MCID::Compare)";
+ if (Inst.isMoveImm) OS << "|(1ULL<<MCID::MoveImm)";
+ if (Inst.isBitcast) OS << "|(1ULL<<MCID::Bitcast)";
+ if (Inst.isSelect) OS << "|(1ULL<<MCID::Select)";
+ if (Inst.isBarrier) OS << "|(1ULL<<MCID::Barrier)";
+ if (Inst.hasDelaySlot) OS << "|(1ULL<<MCID::DelaySlot)";
+ if (Inst.isCall) OS << "|(1ULL<<MCID::Call)";
+ if (Inst.canFoldAsLoad) OS << "|(1ULL<<MCID::FoldableAsLoad)";
+ if (Inst.mayLoad) OS << "|(1ULL<<MCID::MayLoad)";
+ if (Inst.mayStore) OS << "|(1ULL<<MCID::MayStore)";
+ if (Inst.isPredicable) OS << "|(1ULL<<MCID::Predicable)";
+ if (Inst.isConvertibleToThreeAddress) OS << "|(1ULL<<MCID::ConvertibleTo3Addr)";
+ if (Inst.isCommutable) OS << "|(1ULL<<MCID::Commutable)";
+ if (Inst.isTerminator) OS << "|(1ULL<<MCID::Terminator)";
+ if (Inst.isReMaterializable) OS << "|(1ULL<<MCID::Rematerializable)";
+ if (Inst.isNotDuplicable) OS << "|(1ULL<<MCID::NotDuplicable)";
+ if (Inst.Operands.hasOptionalDef) OS << "|(1ULL<<MCID::HasOptionalDef)";
+ if (Inst.usesCustomInserter) OS << "|(1ULL<<MCID::UsesCustomInserter)";
+ if (Inst.hasPostISelHook) OS << "|(1ULL<<MCID::HasPostISelHook)";
+ if (Inst.Operands.isVariadic)OS << "|(1ULL<<MCID::Variadic)";
+ if (Inst.hasSideEffects) OS << "|(1ULL<<MCID::UnmodeledSideEffects)";
+ if (Inst.isAsCheapAsAMove) OS << "|(1ULL<<MCID::CheapAsAMove)";
+ if (Inst.hasExtraSrcRegAllocReq) OS << "|(1ULL<<MCID::ExtraSrcRegAllocReq)";
+ if (Inst.hasExtraDefRegAllocReq) OS << "|(1ULL<<MCID::ExtraDefRegAllocReq)";
+ if (Inst.isRegSequence) OS << "|(1ULL<<MCID::RegSequence)";
+ if (Inst.isExtractSubreg) OS << "|(1ULL<<MCID::ExtractSubreg)";
+ if (Inst.isInsertSubreg) OS << "|(1ULL<<MCID::InsertSubreg)";
+ if (Inst.isConvergent) OS << "|(1ULL<<MCID::Convergent)";
// Emit all of the target-specific flags...
BitsInit *TSF = Inst.TheDef->getValueAsBitsInit("TSFlags");
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index 3f62f20..2b59ee6 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -760,7 +760,7 @@ static void EmitTargetBuiltins(const std::map<std::string, std::string> &BIM,
E = BIM.end(); I != E; ++I) {
std::string ResultCode =
"return " + TargetPrefix + "Intrinsic::" + I->second + ";";
- Results.push_back(StringMatcher::StringPair(I->first, ResultCode));
+ Results.emplace_back(I->first, ResultCode);
StringMatcher("BuiltinName", Results, OS).Emit();
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index ae461bc..dde21c6 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -1033,6 +1033,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
TYPE("vy64mem", TYPE_M64)
TYPE("vy64xmem", TYPE_M64)
TYPE("vz64mem", TYPE_M64)
errs() << "Unhandled type string " << s << "\n";
llvm_unreachable("Unhandled type string");
@@ -1102,6 +1103,7 @@ RecognizableInstr::rmRegisterEncodingFromString(const std::string &s,
errs() << "Unhandled R/M register encoding " << s << "\n";
llvm_unreachable("Unhandled R/M register encoding");
@@ -1141,6 +1143,7 @@ RecognizableInstr::roRegisterEncodingFromString(const std::string &s,
errs() << "Unhandled reg/opcode register encoding " << s << "\n";
llvm_unreachable("Unhandled reg/opcode register encoding");
diff --git a/utils/lit/lit/ b/utils/lit/lit/
index f1734ec..70382b4 100644
--- a/utils/lit/lit/
+++ b/utils/lit/lit/
@@ -513,6 +513,15 @@ def parseIntegratedTestScript(test, normalize_slashes=False,
return lit.Test.Result(Test.UNSUPPORTED,
"Test is unsupported with the following features: %s" % msg)
+ if test.config.limit_to_features:
+ # Check that we have one of the limit_to_features features in requires.
+ limit_to_features_tests = [f for f in test.config.limit_to_features
+ if f in requires]
+ if not limit_to_features_tests:
+ msg = ', '.join(test.config.limit_to_features)
+ return lit.Test.Result(Test.UNSUPPORTED,
+ "Test requires one of the limit_to_features features %s" % msg)
return script,tmpBase,execdir
def _runShTest(test, litConfig, useExternalSh,
diff --git a/utils/lit/lit/ b/utils/lit/lit/
index c7ef94d..1d51c1c 100644
--- a/utils/lit/lit/
+++ b/utils/lit/lit/
@@ -118,7 +118,7 @@ class TestingConfig:
def __init__(self, parent, name, suffixes, test_format,
environment, substitutions, unsupported,
test_exec_root, test_source_root, excludes,
- available_features, pipefail):
+ available_features, pipefail, limit_to_features = []):
self.parent = parent = str(name)
self.suffixes = set(suffixes)
@@ -131,6 +131,10 @@ class TestingConfig:
self.excludes = set(excludes)
self.available_features = set(available_features)
self.pipefail = pipefail
+ # This list is used by to restrict running only tests that
+ # require one of the features in this list if this list is non-empty.
+ # Configurations can set this list to restrict the set of tests to run.
+ self.limit_to_features = set(limit_to_features)
def finish(self, litConfig):
"""finish() - Finish this config object, after loading is complete."""
diff --git a/utils/release/ b/utils/release/
index 503413b..8d6cd5f 100755
--- a/utils/release/
+++ b/utils/release/
@@ -17,7 +17,7 @@ set -e
-projects="llvm cfe dragonegg test-suite compiler-rt libcxx libcxxabi clang-tools-extra polly lldb lld openmp"
+projects="llvm cfe test-suite compiler-rt libcxx libcxxabi clang-tools-extra polly lldb lld openmp"
diff --git a/utils/release/ b/utils/release/
index 89519c4..3cb868b 100755
--- a/utils/release/
+++ b/utils/release/
@@ -388,39 +388,27 @@ for Flavor in $Flavors ; do
- llvmCore_de_phase2_objdir=$BuildDir/Phase2/$Flavor/llvmCore-DragonEgg-$Release-$RC.obj
- llvmCore_de_phase2_installdir=$BuildDir/Phase2/$Flavor/llvmCore-DragonEgg-$Release-$RC.install
- llvmCore_de_phase3_objdir=$BuildDir/Phase3/$Flavor/llvmCore-DragonEgg-$Release-$RC.obj
- llvmCore_de_phase3_installdir=$BuildDir/Phase3/$Flavor/llvmCore-DragonEgg-$Release-$RC.install
rm -rf $llvmCore_phase1_objdir
rm -rf $llvmCore_phase1_installdir
rm -rf $llvmCore_phase2_objdir
rm -rf $llvmCore_phase2_installdir
- rm -rf $llvmCore_de_phase2_objdir
- rm -rf $llvmCore_de_phase2_installdir
rm -rf $llvmCore_phase3_objdir
rm -rf $llvmCore_phase3_installdir
- rm -rf $llvmCore_de_phase3_objdir
- rm -rf $llvmCore_de_phase3_installdir
mkdir -p $llvmCore_phase1_objdir
mkdir -p $llvmCore_phase1_installdir
mkdir -p $llvmCore_phase2_objdir
mkdir -p $llvmCore_phase2_installdir
- mkdir -p $llvmCore_de_phase2_objdir
- mkdir -p $llvmCore_de_phase2_installdir
mkdir -p $llvmCore_phase3_objdir
mkdir -p $llvmCore_phase3_installdir
- mkdir -p $llvmCore_de_phase3_objdir
- mkdir -p $llvmCore_de_phase3_installdir
# Phase 1: Build llvmCore and clang
OpenPOWER on IntegriCloud